inst/doc/Fetching-historical-index-data.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

## ----load-data, echo=FALSE----------------------------------------------------
# This loads pre-saved data to avoid network calls during CRAN checks
# See vignette creation script in data-raw/vignettes/save_data.R
load("data_indexes.RData")

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(rb3)
library(dplyr)
library(ggplot2)
library(tidyr)
library(stringr)
library(lubridate)

## ----get-indexes, eval=FALSE--------------------------------------------------
# # Get all available indices
# indexes <- indexes_get()
# head(indexes)

## ----show-indexes, echo=FALSE-------------------------------------------------
# Display a subset of available indices for demonstration
head(indexes)

## ----fetch-historical-data, eval=FALSE----------------------------------------
# # Download historical data for specific indices across multiple years
# fetch_marketdata("b3-indexes-historical-data",
#   throttle = TRUE,
#   index = c("IBOV", "SMLL", "IDIV"),
#   year = 2018:2023
# )

## ----get-historical-data, eval=FALSE------------------------------------------
# # Get the historical data for analysis
# index_history <- indexes_historical_data_get() |>
#   filter(
#     symbol %in% c("IBOV", "SMLL", "IDIV"),
#     refdate >= "2018-01-01"
#   ) |>
#   collect()

## ----plot-performance, fig.width=8, fig.height=5, fig.cap="Historical Performance of B3 Indices (2018-2023)"----
# Calculate the normalized performance (setting the starting point to 100)
index_performance <- index_history |>
  group_by(symbol) |>
  arrange(refdate) |>
  mutate(
    norm_value = value / first(value) * 100
  )

# Create the performance chart
ggplot(index_performance, aes(x = refdate, y = norm_value, color = symbol)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Comparative Performance of B3 Indices",
    subtitle = "Normalized values (starting at 100)",
    x = "Date",
    y = "Normalized Value",
    color = "Index"
  ) +
  theme_light() +
  scale_x_date(date_labels = "%b %Y", date_breaks = "6 months") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## ----fetch-composition, eval=FALSE--------------------------------------------
# # Download index composition data
# fetch_marketdata("b3-indexes-composition")

## ----get-composition, eval=FALSE----------------------------------------------
# # Get the composition data
# composition <- indexes_composition_get() |>
#   collect()

## ----view-composition---------------------------------------------------------
# Display a subset of the composition data
head(composition)

## ----stocks-in-indices, eval=FALSE--------------------------------------------
# # Get stocks in specific indices
# selected_indices <- c("IBOV", "SMLL", "IDIV")
# 
# # Find stocks in each index
# stocks_by_index <- lapply(selected_indices, function(idx) {
#   composition |>
#     filter(update_date == latest_date, str_detect(indexes, idx)) |>
#     pull(symbol)
# })
# names(stocks_by_index) <- selected_indices

## ----stocks-intersection, fig.width=7, fig.height=5, fig.cap="Overlapping Stocks Between Major B3 Indices"----
# Create a data frame for the Venn diagram visualization
index_overlaps <- data.frame(
  Index = c(
    "IBOV only", "SMLL only", "IDIV only",
    "IBOV & SMLL", "IBOV & IDIV", "SMLL & IDIV",
    "All three indices"
  ),
  Count = c(
    length(setdiff(setdiff(stocks_by_index$IBOV, stocks_by_index$SMLL), stocks_by_index$IDIV)),
    length(setdiff(setdiff(stocks_by_index$SMLL, stocks_by_index$IBOV), stocks_by_index$IDIV)),
    length(setdiff(setdiff(stocks_by_index$IDIV, stocks_by_index$IBOV), stocks_by_index$SMLL)),
    length(intersect(setdiff(stocks_by_index$IBOV, stocks_by_index$IDIV), stocks_by_index$SMLL)),
    length(intersect(setdiff(stocks_by_index$IBOV, stocks_by_index$SMLL), stocks_by_index$IDIV)),
    length(intersect(setdiff(stocks_by_index$SMLL, stocks_by_index$IBOV), stocks_by_index$IDIV)),
    length(Reduce(intersect, stocks_by_index))
  )
)

# Create a bar chart to visualize overlaps
ggplot(index_overlaps, aes(x = reorder(Index, Count), y = Count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Stock Overlap Between Major B3 Indices",
    subtitle = paste("As of", latest_date),
    x = NULL,
    y = "Number of Stocks"
  ) +
  theme_light() +
  geom_text(aes(label = Count), hjust = -0.2)

## ----indices-for-stock, eval=TRUE---------------------------------------------
# Find all indices that include a specific stock
find_indices_for_stock <- function(stock_symbol, comp_data, date) {
  comp_data |>
    filter(update_date == date, symbol == stock_symbol) |>
    pull(indexes) |>
    str_split(",") |>
    unlist() |>
    sort()
}

# Example: Find indices containing PETR4
petr4_indices <- find_indices_for_stock("PETR4", composition, latest_date)

## ----show-indices-for-stock, echo=FALSE---------------------------------------
# Display the indices containing the example stock
petr4_indices

## ----fetch-theoretical, eval=FALSE--------------------------------------------
# # Download theoretical portfolio data
# fetch_marketdata("b3-indexes-theoretical-portfolio", index = c("IBOV", "SMLL", "IDIV"))

## ----get-theoretical, eval=FALSE----------------------------------------------
# # Get the theoretical portfolio data
# theoretical <- indexes_theoretical_portfolio_get() |>
#   collect()
# 
# # Get the latest date for each index
# latest_dates <- theoretical |>
#   group_by(index) |>
#   summarise(latest = max(refdate))

## ----top-constituents, fig.width=8, fig.height=6, fig.cap="Top 10 Constituents by Weight in Ibovespa"----
# Get the top 10 constituents by weight for IBOV
ibov_top10 <- theoretical |>
  filter(index == "IBOV", refdate == latest_dates$latest[latest_dates$index == "IBOV"]) |>
  arrange(desc(weight)) |>
  slice_head(n = 10)

# Create a bar chart of top constituents
ggplot(ibov_top10, aes(x = reorder(symbol, weight), y = weight)) +
  geom_bar(stat = "identity", fill = "darkblue") +
  coord_flip() +
  labs(
    title = "Top 10 Constituents by Weight in Ibovespa",
    subtitle = paste("As of", latest_dates$latest[latest_dates$index == "IBOV"]),
    x = NULL,
    y = "Weight (%)"
  ) +
  theme_light() +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = scales::percent(weight, accuracy = 0.01)), hjust = -0.2)

## ----index-concentration, fig.width=8, fig.height=5, fig.cap="Weight Concentration in B3 Indices"----
# Calculate cumulative weights for different indices
concentration_data <- list()

for (idx in c("IBOV", "SMLL", "IDIV")) {
  latest <- latest_dates$latest[latest_dates$index == idx]

  index_weights <- theoretical |>
    filter(index == idx, refdate == latest) |>
    arrange(desc(weight))

  total_stocks <- nrow(index_weights)

  concentration_data[[idx]] <- data.frame(
    index = idx,
    top_n = c(1, 5, 10, 20, total_stocks),
    cum_weight = c(
      sum(index_weights$weight[1:1]),
      sum(index_weights$weight[1:5]),
      sum(index_weights$weight[1:10]),
      sum(index_weights$weight[1:20]),
      sum(index_weights$weight)
    )
  )
}

concentration_df <- bind_rows(concentration_data)

# Create a grouped bar chart
concentration_plot <- concentration_df |>
  filter(top_n %in% c(1, 5, 10, 20)) |>
  mutate(top_n_label = paste("Top", top_n))

ggplot(concentration_plot, aes(x = index, y = cum_weight, fill = factor(top_n))) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Index Concentration Analysis",
    subtitle = "Cumulative weight of top constituents",
    x = NULL,
    y = "Cumulative Weight",
    fill = "Number of Stocks"
  ) +
  theme_light() +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Blues", labels = c("Top 1", "Top 5", "Top 10", "Top 20"))

## ----fetch-current, eval=FALSE------------------------------------------------
# # Download current portfolio data
# fetch_marketdata("b3-indexes-current-portfolio", index = c("IBOV", "SMLL", "IDIV"))

## ----get-current, eval=FALSE--------------------------------------------------
# # Get the current portfolio data
# current <- indexes_current_portfolio_get() |>
#   collect()
# 
# # Get the latest date for each index
# current_latest <- current |>
#   group_by(index) |>
#   summarise(latest = max(refdate))

## ----sector-composition, fig.width=9, fig.height=6, fig.cap="Sector Breakdown of B3 Indices"----
# Create sector breakdown for each index
sector_data <- list()

for (idx in c("IBOV", "SMLL", "IDIV")) {
  latest <- current_latest$latest[current_latest$index == idx]

  sector_data[[idx]] <- current |>
    filter(index == idx, refdate == latest) |>
    group_by(sector) |>
    summarise(weight = sum(weight)) |>
    arrange(desc(weight)) |>
    mutate(index = idx)
}

sector_df <- bind_rows(sector_data)

# Create a grouped bar chart for sector comparison
ggplot(sector_df, aes(x = index, y = weight, fill = sector)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Sector Composition of B3 Indices",
    subtitle = paste("As of", format(max(current_latest$latest), "%b %Y")),
    x = NULL,
    y = "Weight",
    fill = "Sector"
  ) +
  theme_light() +
  scale_y_continuous(labels = scales::percent) +
  coord_flip() +
  theme(legend.position = "bottom", legend.box = "horizontal")

## ----helper-functions, eval=FALSE---------------------------------------------
# # Function to get assets in specific indices
# indexes_assets_by_indexes <- function(index_list) {
#   last_date <- indexes_composition_get() |>
#     summarise(update_date = max(update_date)) |>
#     collect() |>
#     pull(update_date)
# 
#   x <- lapply(index_list, function(idx) {
#     indexes_composition_get() |>
#       filter(update_date == last_date, str_detect(indexes, idx)) |>
#       select(symbol) |>
#       collect() |>
#       pull(symbol)
#   })
#   stats::setNames(x, index_list)
# }
# 
# # Function to find which indices contain specific assets
# indexes_indexes_by_assets <- function(symbols) {
#   last_date <- indexes_composition_get() |>
#     summarise(update_date = max(update_date)) |>
#     collect() |>
#     pull(update_date)
# 
#   indexes_composition_get() |>
#     filter(update_date == last_date, symbol %in% symbols) |>
#     select(symbol, indexes) |>
#     collect() |>
#     mutate(
#       indexes_list = str_split(indexes, ",")
#     )
# }

## ----performance-metrics, fig.width=8, fig.height=5, fig.cap="Monthly Returns of B3 Indices"----
# Calculate monthly returns
monthly_returns <- index_history |>
  group_by(symbol) |>
  arrange(refdate) |>
  mutate(
    year_month = floor_date(refdate, "month"),
    monthly_return = value / lag(value) - 1
  ) |>
  filter(!is.na(monthly_return)) |>
  group_by(symbol, year_month) |>
  summarise(
    monthly_return = last(monthly_return),
    .groups = "drop"
  )

# Visualize the monthly returns
ggplot(monthly_returns, aes(x = year_month, y = monthly_return, fill = symbol)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~symbol, ncol = 1) +
  labs(
    title = "Monthly Returns of B3 Indices",
    x = NULL,
    y = "Monthly Return"
  ) +
  theme_light() +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position = "none")

## ----summary-stats------------------------------------------------------------
# Calculate annualized summary statistics
performance_summary <- monthly_returns |>
  group_by(symbol) |>
  summarise(
    Mean = mean(monthly_return, na.rm = TRUE),
    Median = median(monthly_return, na.rm = TRUE),
    Std.Dev = sd(monthly_return, na.rm = TRUE),
    Min = min(monthly_return, na.rm = TRUE),
    Max = max(monthly_return, na.rm = TRUE),
    Positive = mean(monthly_return > 0, na.rm = TRUE)
  ) |>
  mutate(
    Annualized.Return = (1 + Mean)^12 - 1,
    Annualized.Volatility = Std.Dev * sqrt(12),
    Sharpe = Annualized.Return / Annualized.Volatility
  )

# Display the summary statistics
performance_summary |>
  select(symbol, Annualized.Return, Annualized.Volatility, Sharpe, Positive) |>
  mutate(
    Annualized.Return = scales::percent(Annualized.Return, accuracy = 0.01),
    Annualized.Volatility = scales::percent(Annualized.Volatility, accuracy = 0.01),
    Sharpe = round(Sharpe, 2),
    Positive = scales::percent(Positive, accuracy = 0.1)
  ) |>
  rename(
    Index = symbol,
    `Ann. Return` = Annualized.Return,
    `Ann. Volatility` = Annualized.Volatility,
    `Sharpe Ratio` = Sharpe,
    `% Positive Months` = Positive
  )

Try the rb3 package in your browser

Any scripts or data that you put into this service are public.

rb3 documentation built on Aug. 8, 2025, 6:20 p.m.