#' Generate URL of Yahoo! Finance
#'
#' @param code numeric. Specify a brand code
#' @param start_date numeric.
#' Specify the start date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param end_date numeric.
#' Specify the end date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param datatype character. Specify the type of stock price data.
#' If `datatype = "d", get daily data.
#' If `datatype = "w", get weekly data.
#' If `datatype = "m", get monthly data.
#' @param page numeric.
#' Specify the page number of the stock price data you want to acquire.
#'
create_url <- function(code, start_date, end_date, datatype, page) {
paste0(
"https://finance.yahoo.co.jp/quote/",
code,
".T/history?from=",
start_date,
"&to=",
end_date,
"&timeFrame=",
datatype,
"&page=",
page
)
}
#' Calculate The Number of Pages of Stock Price Data of A Certain Brand
#' @param code numeric. Specify a brand code
#' @param start_date numeric.
#' Specify the start date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param end_date numeric.
#' Specify the end date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param datatype character. Specify the type of stock price data.
#' If `datatype = "d", get daily data.
#' If `datatype = "w", get weekly data.
#' If `datatype = "m", get monthly data.
#'
#' @importFrom rvest read_html
#' @importFrom rvest html_elements
#' @importFrom rvest html_text2
#' @importFrom magrittr %>%
page_count <- function(code, start_date, end_date, datatype) {
# Generate URL of first web page of stock price data
find <- create_url(code, start_date, end_date, datatype, page = 1)
# Web Scraping the number of observations of stock price data
nrecode <- read_html(find) %>%
html_elements(
xpath = paste0(
'//*[@id="root"]/main/div/div/div[1]/',
"div[2]/section[2]/div/div[4]/p/text()[1]"
)
) %>%
html_text2() %>%
as.numeric()
# Calculate the required number of pages
# using the fact that there are 20 stock price data per page
# Integer division (#observations / 20) is the minimum number of pages
# If value of following modulus (#observations % 20) is not equal to zero,
# required number of pages is the minimum required number of pages plus one.
# If not, required number of pages is the minimum required number of pages.
if (nrecode %% 20 != 0) {
nrecode %/% 20 + 1
} else {
nrecode %/% 20
}
}
#' Web Scraping of Stock Price Data
#'
#' @description It is a function to scrape stock price data
#' on a specific page of a certain brand from Yahoo! Finance.
#' This function is the backbone of this package,
#' and other functions are wrapper functions for this function.
#'
#' @param code numeric. Specify a brand code
#' @param start_date numeric.
#' Specify the start date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param end_date numeric.
#' Specify the end date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param datatype character. Specify the type of stock price data.
#' If `datatype = "d", get daily data.
#' If `datatype = "w", get weekly data.
#' If `datatype = "m", get monthly data.
#' @param page numeric.
#' Specify the page number of the stock price data you want to acquire.
#' @return A tibble data containing stock price data
#'
#' @importFrom rvest read_html
#' @importFrom rvest html_element
#' @importFrom rvest html_table
#' @export
#'
scrape_onepage <- function(code, start_date, end_date, datatype, page) {
# Generate URL of stock price data on a apecific page of a brand
find <- create_url(code, start_date, end_date, datatype, page)
# Web scraping and make data.frame object containing stock price
tab <- read_html(find) %>%
html_element(
xpath = '//*[@id="root"]/main/div/div/div[1]/div[2]/section[2]/div/table'
) %>%
html_table()
# Rename columns
colnames(tab) <- c(
"date",
"open_price",
"high_price",
"low_price",
"close_price",
"volume",
"split_up_adjust_close_price"
)
# character to numeric
tab$open_price <- as.numeric(gsub(",", "", tab$open_price))
tab$high_price <- as.numeric(gsub(",", "", tab$high_price))
tab$low_price <- as.numeric(gsub(",", "", tab$low_price))
tab$close_price <- as.numeric(gsub(",", "", tab$close_price))
tab$volume <- as.numeric(gsub(",", "", tab$volume))
tab$split_up_adjust_close_price <- as.numeric(
gsub(",", "", tab$split_up_adjust_close_price)
)
# output
tab
}
#' Web Scraping of Stock Price Data for One Brand
#'
#' @description Calculate the number of pages required
#' to get all stock price data for one brand,
#' and execute `scrape_onepage` for each page using the loop syntax.
#' At this time, in order to avoid the load on the server,
#' after finishing scraping for one page,
#' wait one second and execute scraping for the next page.
#' Use the `bind_rows` of the {dplyr} package
#' o connect the stock price data on each page in the row direction.
#'
#' @param code numeric. Specify a brand code
#' @param name character. Specify company name.
#' If specified, ignore `code` argument and
#' find out brand code by `code_detect`.
#' @param start_date numeric.
#' Specify the start date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param end_date numeric.
#' Specify the end date of the stock price data you want to acquire
#' in the format of yyyymmdd.
#' @param datatype character. Specify the type of stock price data.
#' If `datatype = "d", get daily data.
#' If `datatype = "w", get weekly data.
#' If `datatype = "m", get monthly data.
#' @return A tibble data containing stock price data
#'
#' @importFrom dplyr bind_rows
#' @export
#' @examples
#' # Acquire 2014 daily stock price data of Sony Group (brand code 6758)
#' scrape_onefirm(
#' 6758,
#' start_date = 20140101,
#' end_date = 20141231,
#' datatype = "d"
#' )
#'
#' # Acquire 2014 monthly stock price data of APAMAN
#' scrape_onefirm(
#' name = "APAMAN",
#' start_date = 20140101,
#' end_date = 20141231,
#' datatype = "m"
#' )
#'
scrape_onefirm <- function(code, name = NULL, start_date, end_date, datatype) {
# Find brand code if name is specified
if (!is.null(name)) {
if (length(name) > 1) stop("Only 1 firm name can be passed.")
find <- code_detect(name)
code <- find$code
if (length(code) > 1) stop(
"The number of matched brand codes is more than one."
)
}
# Calculate required number of pages
maxpg <- page_count(code, start_date, end_date, datatype)
# Generate NULL object
dt <- NULL
# Execute scraping for one page and store it in a object called dt
# The processing of i + 1 is executed 1 second
# after the processing of i is completed.
for (i in seq_len(maxpg)) {
newdt <- scrape_onepage(code, start_date, end_date, datatype, i)
dt <- bind_rows(dt, newdt)
Sys.sleep(1)
}
# character to date
chrlist <- strsplit(dt$date, "(\u5e74|\u6708|\u65e5)")
year <- sapply(chrlist, FUN = function(x) x[1])
month <- sapply(chrlist, FUN = function(x) x[2])
month <- formatC(as.numeric(month), width = 2, flag = 0)
day <- sapply(chrlist, FUN = function(x) x[3])
day <- formatC(as.numeric(day), width = 2, flag = 0)
date <- paste(year, month, day, sep = "-")
dt$date <- as.Date(date)
# add brand code
dt$brand <- code
# Output
dt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.