## -----------------------------------------------------------------------------
# FUNCTIONS FOR DOWNLOADING AND PARSING SPDR ETFs FILEs
## -----------------------------------------------------------------------------
test_unit <- function(n_test){
n_test <- 1
if(n_test == 1){
aod <- "20190912"
# download_all_etf_SPDR(aod)
file_name <- glue::glue("ETF/data_SPDR/summary/SPDR_summary_{aod}.xlsx")
c(info_df, exchanges_df, perfo_df) %<-% read_SPDR_summary(file_name)
}
} # test_unit
download_summary_SPDR <- function(aod){
# DESCRIPTION: download the excel file which summarizes all the SPDR etfs. It can
# be found at the following link: https://it.spdrs.com/en/professional/product/index.seam
# INPUT:
# aod =
# OUTPUT:
# aod =
SPDR_path <- glue::glue("https://it.spdrs.com/library-content/public/spdr-product-data-eu-en-{aod}.xlsx")
file_name <- glue::glue("ETF/data_SPDR/summary/SPDR_summary_{aod}.xlsx")
if(file.exists(file_name)){
flog.info(glue::glue("File {file_name} is already present. I'll use this one! : {aod}"))
return(aod)
}
response <- httr::GET(SPDR_path, write_disk(file_name, overwrite=TRUE))
} # download_all_etf_SPDR
read_SPDR_summary <- function(file_name){
# DESCRIPTION: we're going to divide the excel file in blocks. A block is defined
# as a rectangular piece of the file excel.
# INPUT:
# file_name =
# OUTPUT:
## ---------------------------------------------------------------------------
# 0) READ FILE INTO A TIDY EXCEL DATAFRAME
## ---------------------------------------------------------------------------
# file_name <- glue::glue("ETF/data_SPDR/summary/SPDR_summary_{aod}.xlsx") # to be given as input
tidy_xl <- tidyxl::xlsx_cells(path = file_name)
max_row <- tidy_xl %>%
filter(!is_blank) %>%
filter(stringr::str_detect(character,"Source:" )) %>%
pull(row)
## ---------------------------------------------------------------------------
# 1) BLOCK GENERAL INFORMATION
## ---------------------------------------------------------------------------
start_row_general_info <- 3
start_col_general_info <- 1
last_col_general_info <- tidy_xl %>%
filter(character == "Settlement Cycle") %>%
pull(col)
headers_general_info <- tidy_xl %>%
filter(between(col, start_col_general_info, last_col_general_info)) %>%
filter(row == start_row_general_info) %>%
pull(character)
stopifnot(length(headers_general_info) == last_col_general_info)
headers_general_info %>%
purrr::map_dfc(function(x){
tibble(!!x := get_col_char_single_header(tidy_xl, x, max_row))
}) -> general_info_df
## ---------------------------------------------------------------------------
# 2) BLOCK EXCHANGES
## ---------------------------------------------------------------------------
start_col_exchanges <- last_col_general_info + 1
last_col_exchanges <- tidy_xl %>%
filter(character == "Bid/Offer Spread") %>%
pull(col) - 1
exchanges_tidy_xl <- tidy_xl %>%
filter(between(col, start_col_exchanges, last_col_exchanges),
between(row, start_row_general_info, max_row))
exchanges <- c("London Stock Exchange", "Deutsche B?rse", "SIX Swiss Exchange",
"Borsa Italiana", "Euronext Paris", "Euronext Amsterdam")
start_col_single_exchanges <- exchanges_tidy_xl %>%
filter(character %in% exchanges) %>%
pull(col)
end_col_single_exchanges <- c(start_col_single_exchanges[-1] - 1, last_col_exchanges)
pmap_dfr(list(start_col_single_exchanges, end_col_single_exchanges, exchanges), function(x,y,z){
all_cells <- exchanges_tidy_xl %>%
filter(!is_blank) %>%
filter(between(col,x,y), between(row, start_row_general_info+1, max_row-1)) %>%
select(row, col, data_type, character, numeric)
first_header_row <- all_cells %>%
filter(row == start_row_general_info+1) %>%
select(row, col, currency = character)
second_header_row <- all_cells %>%
filter(row == start_row_general_info+2) %>%
select(row, col, ticker_type = character)
data_cells <- all_cells %>%
filter(row >= start_row_general_info+3) %>%
select(row, col, value = character)
data_cells %>%
enhead(first_header_row, "NNW") %>%
enhead(second_header_row, "N") %>%
# select(-row, -col) %>%
mutate(exchange = z) %>%
select(col, row, value, ticker_type, currency, exchange)
}) -> exchanges_df
## ---------------------------------------------------------------------------
# 2) BLOCK NAV INFORMATION
## ---------------------------------------------------------------------------
start_col_nav_info <- last_col_exchanges + 1
last_col_nav_info <- tidy_xl %>%
filter(character == "Perf as of Date") %>%
pull(col)
headers_nav_info <- tidy_xl %>%
filter(between(col, start_col_nav_info, last_col_nav_info)) %>%
filter(row == start_row_general_info) %>%
pull(character)
headers_nav_info %>%
# str_replace_all("[^[:alnum:]]", " ") %>%
# str_trim() %>%
# stringr::str_replace_all(fixed(" "), "_") %>%
purrr::map_dfc(function(x){
tibble(!!x := get_col_char_single_header(tidy_xl, x, max_row))
}) -> nav_info_df
general_info_df <- bind_cols(general_info_df, nav_info_df)
## ---------------------------------------------------------------------------
# 4) BLOCK PERFO
## ---------------------------------------------------------------------------
start_col_perfo <- last_col_nav_info + 1
last_col_perfo <- tidy_xl %>%
filter(character == "Fund Characteristics as of Date") %>%
pull(col) - 1
all_cells <- tidy_xl %>%
filter(!is_blank) %>%
filter(between(col, start_col_perfo, last_col_perfo)) %>%
filter(between(row, start_row_general_info, max_row)) %>%
select(row, col, data_type, character, numeric)
all_cells[is.na(all_cells$character), "character"] <- all_cells[is.na(all_cells$character), "numeric"]
first_header_row <- all_cells %>%
filter(row == start_row_general_info) %>%
select(row, col, perfo_type = character)
second_header_row <- all_cells %>%
filter(row == start_row_general_info+2) %>%
select(row, col, period = character)
data_cells <- all_cells %>%
filter(row >= start_row_general_info+3) %>%
select(row, col, value = character)
data_cells %>%
enhead(first_header_row, "NNW") %>%
enhead(second_header_row, "N") %>%
select(col, row, value, period, perfo_type) -> perfo_df
## ---------------------------------------------------------------------------
# 5) FUND CHARACTERISTICS
## ---------------------------------------------------------------------------
start_col_fund_charact <- last_col_perfo + 2
last_col_fund_charact <- tidy_xl %>%
filter(character == "Price/Earnings Ratio FY1") %>%
pull(col)
headers_fund_charact <- tidy_xl %>%
filter(between(col, start_col_fund_charact, last_col_fund_charact)) %>%
filter(row == start_row_general_info+2) %>%
pull(character)
fund_charact_aod_df <- tibble(
`Fund Characteristics as of Date` = get_col_char_single_header(tidy_xl, "Fund Characteristics as of Date", max_row)
)
headers_fund_charact %>%
# str_replace_all("[^[:alnum:]]", " ") %>%
# str_trim() %>%
# stringr::str_replace_all(fixed(" "), "_") %>%
purrr::map_dfc(function(x){
tibble(!!x := get_col_char_single_header(tidy_xl, x, max_row))
}) -> fund_charact_df
fund_charact_df <- bind_cols(fund_charact_aod_df, fund_charact_df)
## -------------------------------------------------------------------------
# 6) FINAL BINDING
## -------------------------------------------------------------------------
info_df <- bind_cols(general_info_df, fund_charact_df)
mapping_df <- tidy_xl %>%
filter(character %in% info_df$`Fund Name`) %>%
select(row, character)
exchanges_df <- exchanges_df %>%
left_join(mapping_df, by = "row")
perfo_df <- perfo_df %>%
left_join(mapping_df, by = "row")
## -------------------------------------------------------------------------
# 7) COLUMN PARSING
## -------------------------------------------------------------------------
return(list(
info_df = info_df,
exchanges_df = exchanges_df,
perfo_df = perfo_df
))
} # read_SPDR_summary
get_col_char_single_header <- function(tidy_xl, col_names, max_row){
# DESCRIPTION: function for reading the content of a char column by column name
# or by
# INPUT:
# OUTPUT:
n_cols <- tidy_xl %>%
filter(character %in% col_names) %>%
pull(col)
start_row <- tidy_xl %>%
filter(character %in% col_names) %>%
pull(row) %>%
unique()
stopifnot(length(start_row) == 1)
tidy_xl %>%
filter(col %in% n_cols,
!is.na(character),
row > start_row,
row < max_row) %>%
pull(character)
} # get_column_single_header
parse_summary_SPDR <- function(info_df, exchanges_df, perfo_df){
# DESCRIPTION:
# INPUT:
# OUTPUT:
perfo_df$value <- str_replace(perfo_df$value, "-",NA_character_) %>%
parse_number()/100 # there numbers are all percentages
## -------------------------------------------------------------------------
# 1) PARSE INFO_DF
## -------------------------------------------------------------------------
# colnames(info_df) <- parse_col_name(colnames(info_df))
info_df <- info_df %>%
`colnames<-`(parse_col_name(colnames(.))) %>%
mutate_all(~replace(., . == "-", NA_character_)) %>%
mutate_all(~str_replace_all(., "[^([:alnum:]|' '| ,)]", "")) %>%
mutate_all(readr::parse_guess)
parse_guess(info_df$Effective_Convexity)
## -------------------------------------------------------------------------
# 2) PARSE EXCHANGE_DF
## -------------------------------------------------------------------------
## -------------------------------------------------------------------------
# 1) PARSE PERFO_DF
## -------------------------------------------------------------------------
} # parse_summary_SPDR
parse_col_name <- function(col_names){
# DESCRIPTION:
# INPUT:
# OUTPUT:
col_names %>%
str_replace_all("[^[:alnum:]]", " ") %>%
str_trim() %>%
stringr::str_replace_all(fixed(" "), "_") %>%
stringr::str_replace_all(fixed("__"), "_")
} # parse_col_name
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.