data-raw/DATASET.R

library(tidyverse)
library(RTL)
library(RTLappStrat)

## dataset
crudepipelines <- RTL::getGIS(url = "https://www.eia.gov/maps/map_data/CrudeOil_Pipelines_US_EIA.zip")
refineries <- RTL::getGIS(url = "https://www.eia.gov/maps/map_data/Petroleum_Refineries_US_EIA.zip")
usethis::use_data(crudepipelines, overwrite = TRUE)
usethis::use_data(refineries, overwrite = TRUE)

## Raw toy prices dataset
prices <- RTL::fizdiffs %>%
  dplyr::filter(date >= "2018-12-01") %>%
  dplyr::select(grep("date|CMA|WCS|MSW|Mars",names(.))) %>%
  #dplyr::left_join(RTL::dfwide %>% dplyr::select(date,CL01)) %>%
  dplyr::arrange(date)

x <- RTL::dfwide %>%
  dplyr::select(grep("date|WCW|LSW",names(.))) %>%
  dplyr::select(grep("date|01|02|03",names(.))) %>%
  tidyr::drop_na() %>%
  dplyr::select(order(names(.)))

pricesinit <- prices %>%
  dplyr::left_join(x, by = c("date")) %>%
  tidyr::drop_na() %>%
  dplyr::left_join(RTL::tradeCycle %>%
                     dplyr::filter(market == "canada") %>%
                     dplyr::select(-flowmonth),
                   by = c("date" = "trade.cycle.end")) %>%
  dplyr::left_join(RTL::tradeCycle %>%
                     dplyr::filter(market == "usdomestic") %>%
                     dplyr::select(-flowmonth),
                   by = c("date" = "trade.cycle.end")) %>%
  dplyr::rename(wcwRoll = market.x, fizRoll = market.y) %>%
  dplyr::mutate(tcycleCad = purrr::pmap(.l = list(x = date, mkt = "canada"),.f = RTLappStrat::tcycle)) %>%
  dplyr::mutate(tcycleUs = purrr::pmap(.l = list(x = date, mkt = "usdomestic"),.f = RTLappStrat::tcycle)) %>%
  tidyr::unnest(tcycleCad) %>%
  tidyr::unnest(tcycleUs) %>%
  dplyr::mutate(WCS.HDY1 = WCS.HDY,
                WCS.HDY2 = WCS.HDY - (WCW01 - WCW02),
                MSW.EDM1 = MSW.EDM,
                MSW.EDM2 = MSW.EDM - (LSW01 - LSW02),
                WCS.CUS1 = WCS.CUS,
                WCS.CUS2 = WCS.CUS - (Mars.CLO01 - Mars.CLO02),
                WCS.HOU1 = WCS.HOU,
                WCS.HOU2 = WCS.HOU - (Mars.CLO01 - Mars.CLO02),
                tcycle = dplyr::case_when(tcycleCad == "tcycle" ~ "tcycle",
                                          (tcycleCad == "post" & tcycleUs == "tcycle") ~ "post",
                                          TRUE ~ "drop")) %>%
  dplyr::select(grep("eCad|eUs|Roll|mars",names(.),invert = TRUE))

pricesRaw <- pricesinit %>% dplyr::select(-4:-12) %>% dplyr::filter(tcycle != "drop")

usethis::use_data(pricesRaw, overwrite = TRUE)

rm(prices)

# Converted to monthly prices
## differentials use the tcyclePortion e.g. 75% to compute monthly price
## CMA price is as of the last day of WTI cash pricing
## add West Coast arb using RTL::OU()

tcyclePortion = 0.75

prices <- pricesRaw %>%
  dplyr::select(grep("CMA",names(.),invert = TRUE)) %>%
  tidyr::pivot_longer(cols = c(-date,-tcycle), names_to = "series", values_to = "value") %>%
  dplyr::group_by(series,tcycle) %>%
  tsibble::as_tsibble(key = series, index = date) %>%
  tsibble::group_by_key() %>%
  tsibble::index_by(freq = ~ yearmonth(.)) %>%
  dplyr::group_by(series, tcycle) %>%
  #dplyr::select(-date)
  dplyr::summarise(ave = mean(value), sdev = sd(value),.groups = c("keep")) %>%
  dplyr::mutate(date = as.Date(freq)) %>%
  dplyr::as_tibble() %>%
  dplyr::select(-freq, -sdev) %>%
  dplyr::group_by(series,date) %>%
  dplyr::mutate(tcycle = case_when(tcycle == "post" ~ (1-tcyclePortion),
                                   TRUE ~ tcyclePortion)) %>%
  dplyr::summarise(price = weighted.mean(x = ave,w = tcycle), .groups = "drop") %>%
  dplyr::group_by(series)

pricesCMA <- pricesRaw %>%
  dplyr::select(grep("date|CMA|tcycle",names(.),invert = FALSE)) %>%
  tidyr::pivot_longer(cols = c(-date,-tcycle), names_to = "series", values_to = "value") %>%
  dplyr::group_by(series,tcycle) %>%
  tsibble::as_tsibble(key = series, index = date) %>%
  tsibble::group_by_key() %>%
  tsibble::index_by(freq = ~ yearmonth(.)) %>%
  dplyr::group_by(series) %>%
  #dplyr::select(-date)
  dplyr::summarise(price = dplyr::last(value),.groups = c("keep")) %>%
  dplyr::mutate(date = as.Date(freq)) %>%
  dplyr::as_tibble() %>%
  dplyr::select(-freq) %>%
  dplyr::select(series, date, price) %>%
  dplyr::group_by(series)

prices <- rbind(prices, pricesCMA)
prices <- prices %>%
  tidyr::pivot_wider(names_from = series, values_from = price) %>%
  dplyr::mutate(WC = RTL::simOU(nsims = 1, S0 = 2, mu = 2, theta = 2, sigma = 3, T2M = 1/12 * (nrow(.) - 1), dt = 1 / 12)$sim1)

usethis::use_data(prices, overwrite = TRUE)

# historical economics

econs <- prices %>%
  dplyr::transmute(date = date,
                   WestCoast = WC,
                   EDM.STOR = (WTI.CMA02 + MSW.EDM2) - (WTI.CMA01 + MSW.EDM1),
                   HDY.STOR = (WTI.CMA02 + WCS.HDY2) - (WTI.CMA01 +  WCS.HDY1),
                   CUS.PIPE = (WTI.CMA02 + WCS.CUS2) - (WTI.CMA01 +  WCS.HDY1) - 5,
                   HOU.PIPE = (WTI.CMA02 + WCS.HOU2) - (WTI.CMA01 +  WCS.HDY1) - 5,
                   )

usethis::use_data(econs, overwrite = TRUE)

# charts for plotting on leaflet

p1 <- RTLappStrat::econs %>%
  tidyr::pivot_longer(-date, names_to = "series",values_to = "value") %>%
  dplyr::filter(series %in% c("WestCoast")) %>%
  ggplot2::ggplot(ggplot2::aes(x = date, y = value)) + ggplot2::geom_line(col = "blue") +
  labs(title = "WestCoast Arb",x = "", y = "$ per bbl")
pnt = sf::st_as_sf(data.frame(x = RTL::tradeHubs[6,] %>% dplyr::pull(long), y = RTL::tradeHubs[6,] %>% dplyr::pull(lat)),
                   coords = c("x", "y"),
                   crs = 4326)
econsCharts <- list(wc = list(p1,pnt))

p1 <- RTLappStrat::econs %>%
  tidyr::pivot_longer(-date, names_to = "series",values_to = "value") %>%
  dplyr::filter(series %in% c("EDM.STOR")) %>%
  ggplot2::ggplot(ggplot2::aes(x = date, y = value)) + ggplot2::geom_line(col = "blue") +
  labs(title = "Boxed Storage Econs",x = "", y = "$ per bbl")
pnt = sf::st_as_sf(data.frame(x = RTL::tradeHubs[1,] %>% dplyr::pull(long), y = RTL::tradeHubs[1,] %>% dplyr::pull(lat)),
                   coords = c("x", "y"),
                   crs = 4326)
econsCharts[["edm"]] <- list(p1,pnt)

p1 <- RTLappStrat::econs %>%
  tidyr::pivot_longer(-date, names_to = "series",values_to = "value") %>%
  dplyr::filter(series %in% c("HDY.STOR")) %>%
  ggplot2::ggplot(ggplot2::aes(x = date, y = value)) + ggplot2::geom_line(col = "blue") +
  labs(title = "Boxed Storage Econs",x = "", y = "$ per bbl")
pnt = sf::st_as_sf(data.frame(x = RTL::tradeHubs[2,] %>% dplyr::pull(long), y = RTL::tradeHubs[2,] %>% dplyr::pull(lat)),
                   coords = c("x", "y"),
                   crs = 4326)
econsCharts[["hdy"]] <- list(p1,pnt)

p1 <- RTLappStrat::econs %>%
  tidyr::pivot_longer(-date, names_to = "series",values_to = "value") %>%
  dplyr::filter(series %in% c("CUS.PIPE")) %>%
  ggplot2::ggplot(ggplot2::aes(x = date, y = value)) + ggplot2::geom_line(col = "blue") +
  labs(title = "WCS Pipe Econs to Cushing",x = "", y = "$ per bbl")
pnt = sf::st_as_sf(data.frame(x = RTL::tradeHubs[3,] %>% dplyr::pull(long), y = RTL::tradeHubs[3,] %>% dplyr::pull(lat)),
                   coords = c("x", "y"),
                   crs = 4326)
econsCharts[["cushing"]] <- list(p1,pnt)

p1 <- RTLappStrat::econs %>%
  tidyr::pivot_longer(-date, names_to = "series",values_to = "value") %>%
  dplyr::filter(series %in% c("HOU.PIPE")) %>%
  ggplot2::ggplot(ggplot2::aes(x = date, y = value)) + ggplot2::geom_line(col = "blue") +
  labs(title = "WCS Pipe Econs to USGC",x = "", y = "$ per bbl")
pnt = sf::st_as_sf(data.frame(x = RTL::tradeHubs[4,] %>% dplyr::pull(long), y = RTL::tradeHubs[4,] %>% dplyr::pull(lat)),
                   coords = c("x", "y"),
                   crs = 4326)
econsCharts[["houston"]] <- list(p1,pnt)
usethis::use_data(econsCharts, overwrite = TRUE)

# Assets Description

assets <- tibble::tribble(
  ~inputId, ~ min, ~max, ~value, ~step, ~ formula, ~ expReturn, ~payoff,
  "wc", 3, 10, 5, 0.1, "WC", 1, "pmax(.x, 0)",
  "edm", 1, 2, 1.20, 0.1, "(WTI.CMA02 + MSW.EDM2) - (WTI.CMA01 + MSW.EDM1)", 1.25, "pmax(-.x, 0)",
  "hdy", 1, 2, 1.20, 0.1, "(WTI.CMA02 + WCS.HDY2) - (WTI.CMA01 + WCS.HDY1)", 1.5, "pmax(-.x, 0)",
  "usgc", 3, 10, 5, 0.1,  "(WTI.CMA02 + WCS.HOU2) - (WTI.CMA01 + WCS.HDY1) - 5.5", 2, "pmax(.x, 0)"
)

usethis::use_data(assets, overwrite = TRUE)

# Multivariate sims

#RTL::simMultivariates(nsims = 5000, x = RTLappStrat::prices, s0 = NULL)$sims
#usethis::use_data(pricesSim, overwrite = TRUE)



## testing
risktoollib/RTLappStrat documentation built on Sept. 11, 2022, 10:27 p.m.