#'
#' In this file we're concerned with testing the accuracy of our functions for retrieving
#' price data, as well as to make sure that the raw data is correctly mapped onto
#' the processed, tidy data set.
#'
#' RAW DATA TESTS:
#' - Every ticker in the raw price data is unique with a predicted number of columns.
#' - Every ticker for which we successfully grabbed data, has some data
#' - Missing Companies is Solely Due To Quantmod Finding No Data
#'
#' PROCESSED DATA TESTs:
#' - Raw data matches processed data for any given company
#'
context("Gathering Raw Price Data Tests")
companies <- qmjdata::companies[1:3,]
raw_prices <- tryCatch(qmj::get_prices(companies), error = function(e) e)
test_that("Every ticker in the raw price data is unique with a predicted number of columns", {
## If error is returned, no internet connection.
if(inherits(raw_prices, "error"))
testthat::skip()
## Grab column names and remove everything but the ticker.
tickers <- colnames(raw_prices)
tickers <- gsub('\\.([^.]*$)(.*)', '', tickers) # Remove everything after the last '.' to get rid of '.____' suffixes.
tickers <- tickers[tickers != 'GSPC' & tickers != 'pret'] # We're not interested in either the GSPC or pret columns.
occurrences <- table(tickers)
## After removing pret, every ticker should only appear 5 times.
apply(occurrences, MARGIN=1, FUN=function(ticker_occurrences){expect_equal(ticker_occurrences, 5)})
## We use length(table) as we presume that price data for some tickers were not found.
num_columns_used_by_GSPC <- 7 # GSPC.Open, GSPC.High, GSPC.Low, GSPC.Close, GSPC.Volume, GSPC.Adjusted, pret
num_of_columns_per_ticker <- 6 # TICK.Open, TICK.High, TICK.Low, TICK.Close, TICK.Volume, pret.num
## Make sure we covered all columns in the raw price data.
expected_num_columns = (num_columns_used_by_GSPC) + (num_of_columns_per_ticker * length(occurrences))
expect_equal(expected_num_columns, ncol(raw_prices))
})
test_that("Every ticker for which we successfully grabbed data, has some data", {
## If error is returned, no internet connection.
if(inherits(raw_prices, "error"))
testthat::skip()
#' @describeIn Check to make sure every column has at least one non-na entry
col_check <- function(column) {
expect_true(sum(!is.na(column)) >= 1, label="grabbed data contains some data")
}
## For completeness, we check all columns to ensure they have at least one non-NA entry.
apply(raw_prices, MARGIN=2, FUN=col_check)
})
test_that("Missing Companies is Solely Due To Quantmod Finding No Data", {
## If error is returned, no internet connection.
if(inherits(raw_prices, "error"))
testthat::skip()
## First get all companies for which we do have data.
tickers <- colnames(raw_prices)
tickers <- gsub('\\.([^.]*$)(.*)', '', tickers) # Remove everything after the last '.' to get rid of '.____' suffixes.
tickers <- tickers[tickers != 'GSPC' & tickers != 'pret'] # We're not interested in either the GSPC or pret columns.
tickers <- unique(tickers)
## Then determine which companies have no price data
missing_tickers <- companies$ticker[!companies$ticker %in% tickers]
## We only want stock data from the past 2 years
start <- as.POSIXlt(Sys.Date())
start$year <- start$year - 2
start <- as.Date(start)
#' @describeIn For each missing ticker, ensure that quantmod produces either an error or an empty
#' xts object.
download_check <- function(ticker){
price_data <- tryCatch(
quantmod::getSymbols(companyTicker, src="google", auto.assign=FALSE, from=start, warnings=FALSE,
messages=FALSE),
error = function(e) e
)
## Check to make sure that either price_data is an error, or that it has a single
## row of NA's.
expect_equal(inherits(price_data, "error") || (length(price_data[,1])==1 && sum(!is.na(price_data))==0), TRUE,
label=paste0(ticker, " price information missing due to download error or lack of data"))
}
lapply(missing_tickers, FUN=download_check)
})
#################################################
##
## Tests related to data processing.
##
#################################################
context("Processing/Tidying Price Data Tests")
test_that("Raw data matches processed data for any given company", {
## If error is returned, no internet connection.
if(inherits(raw_prices, "error"))
testthat::skip()
## Grab all .Close and pret columns from the raw data.
close_indices <- grep('(.Close)', colnames(raw_prices))
pret_indices <- grep('(pret)', colnames(raw_prices))
subset_close <- raw_prices[, close_indices]
subset_pret <- raw_prices[,pret_indices]
column_names <- colnames(subset_close)
## A quick sanity check. Make sure subset_close and subset_pret have
## equal numbers of columns.
expect_equal(ncol(subset_close), ncol(subset_pret), label='col numbers of close')
## Get the tidied prices for use in the compare_data function.
prices <- tidy_prices(raw_prices)
#' @describeIn Compares raw close and raw pret with their
#' processed counterparts.
compare_row <- function(close, pret, tidyclose, tidypret) {
expect_equal(close, tidyclose, label='closing values match')
expect_equal(pret, tidypret, label='pret values match')
}
#' @describeIn Accept raw columns containing closing prices and pret,
#' find the relevant chunk of processed data, and compare the values.
compare_transcription <- function(colname, close, pret) {
# Grab the ticker by removing everything past, and including, the last '.'
ticker <- gsub('\\.([^.]*$)(.*)', '', colname)
message(paste("Comparing raw and tidied prices for", ticker, sep=' '))
price_subset <- prices[prices$ticker==ticker,]
## Compare values with their counterparts row-by-row.
mapply(compare_row, close, pret, price_subset$close, price_subset$pret)
}
## Now, iterate through both subsetted columns and compare data with the
## processed prices data. subset_close and subset_pret have equal col
## numbers from an earlier check.
for(i in 1:ncol(subset_close)) {
compare_transcription(column_names[i], subset_close[,i], subset_pret[,i])
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.