R/utils.R

Defines functions validate_funs get_merval get_ipsa get_ipc get_ibov get_tsx get_taiex get_kospi get_nzx50 get_bursa get_jakarta get_sensex get_hangseng get_topix get_bel20 get_cac40 get_dax get_ftse100 get_russell2000 get_amex get_nyse get_dow get_sp500

Documented in get_amex get_bel20 get_bursa get_cac40 get_dax get_dow get_ftse100 get_hangseng get_ibov get_ipc get_ipsa get_jakarta get_kospi get_merval get_nyse get_nzx50 get_russell2000 get_sensex get_sp500 get_taiex get_topix get_tsx validate_funs

#' A collection of utility functions
#'
#' A collection of utility functions to get market data from the Web.
#'
#' @details NULL
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
#' @name utils
NULL


# Web-scrapping functions -------------------------------------------------

#' @rdname utils
get_sp500 <- function() {

  path <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

  message(
    "Yahoo does not provide the S&P 500 components. Tickers been downloaded from: ",
    crayon::cyan("en.wikipedia.org/wiki/List_of_S%26P_500_companies"),
    "."
    )

  # rvest functions: Get table of stocks
  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table.wikitable") %>%
    rvest::html_table(., fill = TRUE) %>%
    #vctrs::vec_as_names_legacy(.) %>%
    dplyr::rename(tickers = "Symbol") %>%
    dplyr::select(.data$tickers) %>%
    dplyr::as_tibble()

}


#' @rdname utils
get_dow <- function() {

  #path <- "https://finance.yahoo.com/quote/%5EDJI/components?p=%5EDJI"

  # insistently_read_html <- purrr::insistently(f = xml2::read_html)
  #
  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_node(css = "table") %>%
  #   rvest::html_table() %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$Symbol) %>%
  #   dplyr::select(.data$tickers)

  tibble::tibble(tickers = c("MMM", "AXP", "AMGN", "AAPL", "BA", "CAT", "CVX", "CSCO",
                             "KO", "DOW", "GS", "HD", "HON", "IBM", "INTC", "JNJ", "JPM",
                             "MCD", "MRK", "MSFT", "NKE", "PG", "CRM", "TRV", "UNH", "VZ",
                             "V", "WBA", "WMT", "DIS")
                 )

}


# @rdname utils
# get_nasdaq <- function() {
#
#   ## TODO fix this code
#   #path <- "https://www.nasdaq.com/screening/companies-by-industry.aspx?exchange##=NASDAQ&render=download"
#
#   message(
#     "Yahoo does not provide the NASDAQ components. Tickers been downloaded from: ",
#     crayon::cyan("www.nasdaq.com"),
#     "."
#     )
#
#   suppressMessages(
#     suppressWarnings(
#       readr::read_csv(file = path)
#     )
#   ) %>%
#     dplyr::rename(tickers = .data$Symbol) %>%
#     dplyr::select(.data$tickers)
#
# }


#' @rdname utils
get_nyse <- function() {

  path <- "https://money.cnn.com/data/markets/nyse/?page="
  pages <- 1:128
  paths <- stringr::str_c(path, pages)

  message(
    "Yahoo does not provide the NYSE components. Tickers been downloaded from: ",
    crayon::cyan("money.cnn.com/data/markets/nyse"),
    "."
    )

  map_paths <- function(paths) {

    insistently_read_html <- purrr::insistently(f = xml2::read_html)

    paths %>%
      insistently_read_html() %>%
      rvest::html_nodes(css = "table") %>%
      rvest::html_table() %>%
      purrr::chuck(4) %>%
      dplyr::select(.data$Company) %>%
      tidyr::separate(.data$Company, into = c("tickers", "company"), extra = "drop") %>%
      dplyr::select(.data$tickers) %>%
      dplyr::as_tibble()

  }

  purrr::map_df(.x = paths, .f = map_paths)

}


#' @rdname utils
get_amex <- function() {

  path <- "https://money.cnn.com/data/markets/amex/?page="
  pages <- 1:13
  paths <- stringr::str_c(path, pages)

  message(
    "Yahoo does not provide the AMEX components. Tickers been downloaded from: ",
    crayon::cyan("money.cnn.com/data/markets/amex")
  )

  map_paths <- function(paths) {

    insistently_read_html <- purrr::insistently(f = xml2::read_html)

    paths %>%
      insistently_read_html() %>%
      rvest::html_nodes(css = "table") %>%
      rvest::html_table() %>%
      purrr::chuck(4) %>%
      dplyr::select(.data$Company) %>%
      tidyr::separate(.data$Company, into = c("tickers", "company"), extra = "drop") %>%
      dplyr::select(.data$tickers) %>%
      dplyr::as_tibble()

  }

  purrr::map_df(.x = paths, .f = map_paths)

}


#' @rdname utils
get_russell2000 <- function() {

  path <- "https://money.cnn.com/data/markets/russell/?page="
  pages <- 1:71
  paths <- stringr::str_c(path, pages)

  message(
    "Yahoo does not provide the Russell 2000 components. Tickers been downloaded from: ",
    crayon::cyan("money.cnn.com/data/markets/russell"),
    "."
  )

  map_paths <- function(paths) {

    insistently_read_html <- purrr::insistently(f = xml2::read_html)

    paths %>%
      insistently_read_html() %>%
      rvest::html_nodes(css = "table") %>%
      rvest::html_table() %>%
      purrr::chuck(4) %>%
      dplyr::select(.data$Company) %>%
      tidyr::separate(.data$Company, into = c("tickers", "company"), extra = "drop") %>%
      dplyr::select(.data$tickers) %>%
      dplyr::as_tibble()

  }

  purrr::map_df(.x = paths, .f = map_paths)

}


#' @rdname utils
get_ftse100 <- function() {

  path <- "https://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/summary/summary-indices-constituents.html?index=UKX&page="
  pages <- 1:6
  paths <- stringr::str_c(path, pages)

  message(
    "Yahoo does not provide the FTSE 100 components. Tickers been downloaded from: ",
    crayon::cyan("www.londonstockexchange.com"),
    "."
    )

  map_paths <- function(paths) {

    insistently_read_html <- purrr::insistently(f = xml2::read_html)

    paths %>%
      insistently_read_html() %>%
      rvest::html_nodes(css = "table") %>%
      rvest::html_table() %>%
      data.frame() %>%
      vctrs::vec_as_names_legacy() %>%
      dplyr::as_tibble() %>%
      dplyr::rename(tickers = 1) %>%
      dplyr::select(.data$tickers) %>%
      dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".L") %>%
                      stringr::str_replace(., "\\..L", "\\.L")
                    )

  }

  purrr::map_df(.x = paths, .f = map_paths)

}


#' @rdname utils
get_dax <- function() {

  path <- "https://finance.yahoo.com/quote/%5EGDAXI/components?p=%5EGDAXI"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_cac40 <- function() {

  # TODO for now get the first 30 components from yahoo website.
  # TODO find a website that offers the list of stocks with the right tickers

  # path <- "http://topforeignstocks.com/indices/components-of-the-cac-40-index/"
  #
  # message(
  #   "Yahoo does not provide the CAC 40 components. Tickers been downloaded from: ",
  #   crayon::cyan("topforeignstocks.com"),
  #   "."
  #   )

  path <- 'https://finance.yahoo.com/quote/%5EFCHI/components?p=%5EFCHI'

  message(
    "Yahoo does not provide the CAC 40 components, but it's website it's beeing used temporarily while no other reliable source with the tickers is found."
  )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_nodes(css = "table") %>%
  #   rvest::html_table() %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$`Ticker..in.Yahoo.Finance.`) %>%
  #   dplyr::select(.data$tickers)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_bel20 <- function() {

  path <- "https://finance.yahoo.com/quote/%5EBFX/components?p=%5EBFX"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    as.data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_topix <- function() {

  message(
    "Yahoo does not provide the NYSE components. Tickers downloaded from: ",
    crayon::cyan("www.jpx.co.jp/english/markets/indices/topix"),
    "."
    )

  url <- "https://www.jpx.co.jp/english/markets/indices/topix/tvdivq00000030ne-att/TOPIX_weight_en.xlsx"
  destfile <- "TOPIX_weight_en.xlsx"

  insistently_curl_download <- purrr::insistently(f = curl::curl_download)

  insistently_curl_download(url, destfile)

  suppressWarnings(readxl::read_excel(destfile)) %>%
    dplyr::filter(.data$`New Index Series Code` == "TOPIX Core30") %>%
    dplyr::select(.data$Code) %>%
    dplyr::rename(tickers = .data$Code) %>%
    dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".T")) %>%
    dplyr::distinct()

}


#' @rdname utils
get_hangseng <- function() {

  # path <- "http://topforeignstocks.com/indices/components-of-the-hang-seng-index/"
  #
  # message(
  #   "Yahoo does not provide the Hang Seng components. Tickers been downloaded from: ",
  #   crayon::cyan("topforeignstocks.com"),
  #   "."
  #   )
  #
  # insistently_read_html <- purrr::insistently(f = xml2::read_html)
  #
  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_nodes(css = "table") %>%
  #   rvest::html_table() %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$Ticker) %>%
  #   dplyr::select(.data$tickers)
  #
  #    path <- 'https://finance.yahoo.com/quote/%5EFCHI/components?p=%5EFCHI'

  message(
    "Yahoo does not provide all the hangseng components, but it's website it's beeing used temporarily while no other reliable source with the tickers is found."
  )

  path <- "https://finance.yahoo.com/quote/%5EHSI/components"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_sensex <- function() {

  # path <- "http://topforeignstocks.com/indices/components-of-the-sensex-index"
  #
  # message(
  #   "Yahoo does not provide the Sensex components. Tickers been downloaded from: ",
  #   crayon::cyan("topforeignstocks.com"),
  #   "."
  #   )
  #
  #path <- "https://in.finance.yahoo.com/quote/%5EBSESN/components?p=%5EBSESN"
  path <- "https://en.wikipedia.org/wiki/BSE_SENSEX"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_nodes(css = "table") %>%
  #   rvest::html_table() %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$Scrip.Code) %>%
  #   dplyr::select(.data$tickers) %>%
  #   dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".BO"))

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table.wikitable") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)


}


#' @rdname utils
get_jakarta <- function() {

  path <- "http://topforeignstocks.com/indices/the-components-of-the-jakarta-composite-index-index"

  message(
    "Yahoo does not provide the Jakarta Index components. Tickers been downloaded from: ",
    crayon::cyan("topforeignstocks.com"),
    "."
  )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Code) %>%
    dplyr::select(.data$tickers) %>%
    dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".JK"))

}


#' @rdname utils
get_bursa <- function() {

  # path <- "http://topforeignstocks.com/indices/the-components-of-the-ftse-bursa-malaysia-klci-index"
  #
  # message(
  #   "Yahoo does not provide the FTSE Bursa Index components. Tickers been downloaded from: ",
  #   crayon::cyan("topforeignstocks.com"),
  #   "."
  #   )
  #
  # insistently_read_html <- purrr::insistently(f = xml2::read_html)
  #
  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_nodes(css = "table") %>%
  #   rvest::html_table() %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$Ticker) %>%
  #   stats::na.omit() %>%
  #   dplyr::select(.data$tickers)

  path <- "https://finance.yahoo.com/quote/%5EKLSE/components?p=%5EKLSE"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_nzx50 <- function() {

  # path <- "http://topforeignstocks.com/indices/components-of-the-nzsx-50-index"
  #
  # message(
  #   "Yahoo does not provide the NZX 50 Index components. Tickers been downloaded from: ",
  #   crayon::cyan("topforeignstocks.com"),
  #   "."
  # )
  #
  # insistently_read_html <- purrr::insistently(f = xml2::read_html)
  #
  # path %>%
  #   insistently_read_html() %>%
  #   rvest::html_nodes(css = "table") %>%
  #   rvest::html_table() %>%
  #   purrr::chuck(1) %>%
  #   data.frame() %>%
  #   dplyr::as_tibble() %>%
  #   dplyr::rename(tickers = .data$Ticker) %>%
  #   dplyr::select(.data$tickers) #%>%
    #dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".NZ"))

  path <- "https://en.wikipedia.org/wiki/S%26P/NZX_50_Index"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table.wikitable") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Ticker.symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_kospi <- function() {

  path <- "http://topforeignstocks.com/indices/the-components-of-the-korea-stock-exchange-kospi-index"

  message(
    "Yahoo does not provide the KOSPI Index components. Tickers been downloaded from: ",
    crayon::cyan("topforeignstocks.com"),
    "."
  )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Ticker) %>%
    dplyr::select(.data$tickers) %>%
    dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".KS"))

}


#' @rdname utils
get_taiex  <- function() {

  path <- "http://topforeignstocks.com/indices/the-components-of-the-taiwan-stock-exchange-weighted-index"

  message(
    "Yahoo does not provide the TAIEX Index components. Tickers been downloaded from: ",
    crayon::cyan("topforeignstocks.com"),
    "."
    )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Ticker) %>%
    dplyr::select(.data$tickers) %>%
    dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".TW"))

}


#' @rdname utils
get_tsx  <- function() {

  path <- "http://topforeignstocks.com/indices/the-components-of-the-sptsx-composite-index"

  message(
    "Yahoo does not provide the TSX Index components. Tickers been downloaded from: ",
    crayon::cyan("topforeignstocks.com"),
    "."
    )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Ticker) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_ibov  <- function() {

  path <- "http://topforeignstocks.com/indices/components-of-the-bovespa-index"

  message(
    "Yahoo does not provide the Ibovespa Index components. Tickers been downloaded from: ",
    crayon::cyan("topforeignstocks.com"),
    "."
  )

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Ticker) %>%
    dplyr::select(.data$tickers) %>%
    dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".SA"))

}


#' @rdname utils
get_ipc  <- function() {

  path <- "https://finance.yahoo.com/quote/%5EMXX/components?p=%5EMXX"

  #insistently_curl_download <- purrr::insistently(f = curl::curl_download)

  # insistently_curl_download(url, destfile)
  #
  # ipc_tbl <- suppressWarnings(
  #
  #   readxl::read_excel(destfile, skip = 9) %>%
  #   dplyr::mutate(Symbol = stringr::str_remove(.data$Symbol, " \\*")) %>%
  #   tidyr::separate(.data$Symbol, into = c("part1", "part2"), sep = " ", extra = "drop")
  #
  #   )
  #
  # ipc_tbl %>%
  #   dplyr::mutate(part2 = dplyr::if_else(
  #     condition = .data$part2 %in% NA,
  #     true = "",
  #     false = .data$part2
  #     )
  #   ) %>%
  #   tidyr::unite(col = "tickers", .data$part1, .data$part2, sep = "") %>%
  #   dplyr::select(.data$tickers) %>%
  #   dplyr::filter(.data$tickers != "NA") %>%
  #   dplyr::mutate(tickers = stringr::str_c(.data$tickers, ".MX"))

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_node(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_ipsa <- function() {

  path <- "https://finance.yahoo.com/quote/%5EIPSA/components?p=%5EIPSA"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


#' @rdname utils
get_merval <- function() {

  path <- "https://finance.yahoo.com/quote/%5EMERV/components?p=%5EMERV"

  insistently_read_html <- purrr::insistently(f = xml2::read_html)

  path %>%
    insistently_read_html() %>%
    rvest::html_nodes(css = "table") %>%
    rvest::html_table() %>%
    data.frame() %>%
    dplyr::as_tibble() %>%
    dplyr::rename(tickers = .data$Symbol) %>%
    dplyr::select(.data$tickers)

}


# Other Auxiliary Functions -----------------------------------------------

#' @rdname utils
#' @param .fun A valid function from the forecast package.
validate_funs <- function(.fun) {

  fun_name <- lazyeval::expr_text(.fun)

  acceptable_funs <- c("arfima", "Arima", "auto.arima", "ets", "baggedModel",
                       "baggedETS", "bats", "tbats", "nnetar", "tslm")

  if (!(fun_name %in% acceptable_funs)) {

    message("YahooTickers currently only support functions from the forecast package.",
            "\n",
            "Please, change the argument ", crayon::cyan(".fun"), " to one of the options bellow:"
            )

    purrr::iwalk(
      .x = acceptable_funs,
      .f = ~ cat(.y, ":", .x, "\n")
      )

    stop()

  }

}
Reckziegel/YahooTickers documentation built on Nov. 29, 2021, 9:45 p.m.