#' @import data.table
NULL
#' Unsplit the Close from an xts
#'
#' \code{remove_autosplit} allows \code{quantmod::getSymbols.yahoo} to
#' return closes that are not split adjusted. There is no change
#' in \code{quantmod}, but Yahoo has changed what they return.
#'
#' @param prices An xts with a Close as in quantmod.
#' @param splits An xts with the splits for the prices.
#'
#' @return The xts with the splits undone.
#'
#' @export
remove_autosplit <- function(prices, splits) {
all_dates <- unique(sort(c(zoo::index(prices), zoo::index(splits))))
n_dates <- length(all_dates)
shares <- xts::xts(rep(1, n_dates), order.by=all_dates)
shares[zoo::index(splits),] <- 1/splits
shares <- stats::lag(shares, k=-1)
shares[n_dates,] <- 1 # lag introduces NA at most recent date
cumshares <- xts::xts( rev( cumprod( rev( drop( zoo::coredata(shares) ) ) ) ),
order.by = zoo::index(shares))
close_col <- grep("close", colnames(prices), ignore.case=TRUE)
unsplit_prices <- prices
unsplit_prices[,close_col] <- prices[,close_col] * cumshares[zoo::index(prices),]
unsplit_prices
}
clean_duplicate_dates <- function(x) {
`%>%` <- magrittr::`%>%`
dates <- x %>% as.data.frame %>% row.names %>% as.Date
with_dates <- x %>% as.data.frame %>% dplyr::mutate(Date=dates)
Date <- n <- NULL
date_count <- with_dates %>% dplyr::count(Date)
duplicates <- (date_count %>% dplyr::pull(n)) > 1
if( any(duplicates) ) {
warning(paste0("Multiple rows for dates ", date_count %>% dplyr::filter(duplicates)))
}
y <- with_dates %>%
dplyr::group_by(Date) %>%
dplyr::slice(dplyr::n()) %>%
dplyr::ungroup()
xts::as.xts(dplyr::select(y, -Date), order.by=y[["Date"]])
}
needs_symbol <- function(x) any( grepl(".", names(x), fixed=TRUE) )
#' Retrieve stock prices as data.table
#'
#' \code{getDTSymbols} adapts \code{quantmod::getSymbols} to convert
#' results to \code{data.table}.
#'
#' @param x A symbol or list of symbols to retrieve from Yahoo and adjust.
#' @param ... Additional arguments passed to quantmod::getSymbols.
#' @param cache Whether to check cache for data before calling quantmod::getSymbols.
#'
#' @return A \code{data.table} of data for input symbols with adjustments.
#'
#' @export
getDTSymbols <- function(x, ..., cache=TRUE) {
argg <- list(...)
get_arg <- function(var) {
expr <- substitute(hasArg(y), list(y=var))
if( eval(expr, parent.frame()) ) {
value <- argg[[var]]
} else {
value <- formals(quantmod::getSymbols.yahoo)[[var]] # TODO move away from yahoo default
}
eval(value)
}
start_date <- get_arg("from")
end_date <- get_arg("to")
index <- NULL
results <- list()
for( symbol in x ) {
cache_file <- get_cache_file(symbol, start_date)
found_end_date <- as.Date("1000-01-01")
cache_exists <- file.exists(cache_file)
if( cache_exists ) {
data <- load_cache(cache_file)
found_end_date <- data[, max(index)] + 1 # yahoo end date is non-inclusive
}
if( found_end_date < end_date ) {
getSymbols <- quantmod::getSymbols # getSymbols doesn't expect to see the
# package name when it retrieves its defaults (gives a warning).
# Do this rather than importing getSymbols.
price <- getSymbols(symbol, ...)
splits <- quantmod::getSplits(symbol, ...)
price <- remove_autosplit(price, splits) # Yahoo has started returning split-adjusted closes
price <- clean_duplicate_dates(price)
dividends <- quantmod::getDividends(symbol, ...)
raw <- make_raw_value(price, splits, dividends)
new_data <- gather_symbol( as.data.table(raw) )
setkey(new_data, symbol, index)
if( nrow(new_data) > 0 ) {
if( cache && cache_exists )
check_update(data, new_data)
if( cache )
save_cache(new_data, cache_file)
data <- new_data
found_end_date <- data[, max(index)] + 1 # yahoo end date is non-inclusive
}
if( found_end_date < end_date ) {
warning(symbol, " data stops at ", found_end_date)
}
}
results[[symbol]] <- data
}
rbindlist(results)
}
#' Compare whether two data sets agree on overlap.
#'
#' \code{check_update} compares the loaded data for two data sets to
#' make sure that the second is an update of the first. Throws an error
#' if the is something nontrivial in the update.
#'
#' @param old Older, presumably smaller set of data.
#' @param new Newer, presumably bigger set of data.
#'
#' @return TRUE if the sets agree on the overlap.
check_update <- function(old, new) {
Close <- i.Close <- i.split <- rawvalue <- i.rawvalue <- rawshares <-
retroactive_shares <- dividend <- i.dividend <- NULL
DIVIDEND_PRECISION <- 1e-12
stopifnot( key(old) == key(new))
overlap <- new[old, on=key(old)]
# cols_to_match <- c("Close", "split")
mismatch <- overlap[, which(Close != i.Close | split != i.split | rawvalue != i.rawvalue)]
# Now check dividend, which may be adjusted differently if there
# are splits in new that are not in old
# rawshares <- overlap[, 1/cumprod(split)]
dividends <- new[, c(key(new), "dividend"), with=FALSE]
dividends[, rawshares := new[, 1/cumprod(split)] ]
dividends[, retroactive_shares := last(rawshares) / rawshares]
raw_dividends <- dividends[old, retroactive_shares*dividend, on=key(old)]
old_rawshares <- overlap[, 1/cumprod(i.split)]
old_retroactive_shares <- last(old_rawshares) / old_rawshares
readjusted_dividends <- raw_dividends / old_retroactive_shares
dividend_mismatch <- which( abs( readjusted_dividends - overlap[, i.dividend]) > DIVIDEND_PRECISION)
last_old_index <- nrow(overlap)
# Exclude mismatches if the last line of the cache didn't get the dividend at the time of creation
if( last_old_index %in% dividend_mismatch && # dividend mismatch
last_old_index %in% mismatch ) { # rawvalue mismatch
last_old <- overlap[last_old_index,]
if( last_old[,i.dividend==0 && abs(rawvalue - rawshares * dividend - i.rawvalue) < DIVIDEND_PRECISION ] ) {
key_str <- vapply(key(old), function(key_col) {
as.character(last_old[[ key_col]])
}, "") # want to get date string formatted
warning_message <- paste0("Cache was missing dividend on ",
paste(key_str, collapse=", "),
". Raw value increased from ",
last_old[,i.rawvalue],
" to ",
last_old[,rawvalue],
".")
warning(warning_message)
mismatch <- setdiff(mismatch, last_old_index)
dividend_mismatch <- setdiff(dividend_mismatch, last_old_index)
}
}
total_mismatch <- sort( c( mismatch, dividend_mismatch) )
if( length(total_mismatch) > 0 ) {
first_mismatch <- overlap[total_mismatch[1], key(old), with=FALSE]
problems <- rbind( old[first_mismatch][, version := "cached"], new[first_mismatch][, version := "new"])
problems_str <- paste0("\n", paste(utils::capture.output(print(problems)), collapse="\n"))
stop("New data is not an update of old data. Check cache via load_cache vs. getDTSymbols(...,cache=FALSE): ",
problems_str)
}
TRUE
}
#' Turn splits into evolution of single share.
#'
#' @param splits A numeric vector of splits. Most values will be 1. A 2:1 split
#' is represented as 0.5.
#' @return A numeric vector of shares held if splits are applied to an
#' initial position of 1 share.
#'
#' @export
make_shares <- function(splits) {
cumprod(1/splits)
}
#' Turn split adjusted dividends into unadjusted dividends.
#'
#' @param split_adjusted_dividend Split adjusted dividends for a stock.
#' @param splits Stock split data.
#' @param ... Additional arguments.
#' @export
unadjust <- function(split_adjusted_dividend, splits, ...) UseMethod('unadjust')
#' Turn split adjusted dividends into unadjusted dividends.
#'
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#'
#' @param split_adjusted_dividend A numeric vector of split adjusted dividends.
#' @param splits A numeric vector of splits.
#' @param ... Additional arguments.
#' @param max_decimals The number of decimal places that unadjusted dividends may have.
#' @return A numeric vector of unadjusted dividends.
#'
#' @export
unadjust.default <- function(split_adjusted_dividend, splits,..., max_decimals = 3) {
n <- length(splits)
stopifnot( n == length(split_adjusted_dividend) )
shares <- make_shares(splits)
round(split_adjusted_dividend * shares[n] / shares, digits = max_decimals)
}
#' Turn split adjusted dividends into unadjusted dividends.
#'
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#'
#' @param split_adjusted_dividend A \code{data.table} with a \code{dividend} column and
#' an \code{index} column to merge on.
#' @param splits A \code{data.table} with columns \code{splits} and \code{index}.
#' @param ... Additional arguments passed to \code{unadjust.default}, for example, \code{max_decimals}.
#' @return A \code{data.table} of merged splits and dividends with new fields \code{shares}
#' and \code{unadjusted_dividend}. The merge matches on the \code{index} field. Missing
# splits are filled in with 1. Missing dividends are filled in with 0.
#'
#' @export
unadjust.data.table <- function(split_adjusted_dividend, splits, ...) {
stopifnot( is.data.table(splits),
all( c('index', 'dividend') %in% names(split_adjusted_dividend) ),
all( c('index', 'splits') %in% names(splits) ) )
index <- dividend <- shares <- unadjusted_dividend <- NULL
just_splits <- all( names(splits) %in% c('index', 'splits') )
just_dividends <- all( names(split_adjusted_dividend) %in% c('index', 'dividend') )
div_not_subset_splits <- ! all( split_adjusted_dividend[, index] %in% splits[, index] )
splits_not_subset_div <- ! all( splits[,index] %in% split_adjusted_dividend[, index] )
if( splits_not_subset_div && ! just_dividends ) {
stop("unadjust can't fill non-dividend columns when merging with splits")
} else if( div_not_subset_splits && ! just_splits ) {
stop("unadjust can't fill non-split columns when merging with dividends")
} else { #if( div_not_subset_splits || splits_not_subset_div ) {
merged_data <- merge(split_adjusted_dividend, splits, by = 'index', all = TRUE)
merged_data[ is.na(splits), splits := 1]
merged_data[ is.na(dividend), dividend := 0]
}
# else { # splits and dividends must have the same index
# merged_data
# }
merged_data[, shares := make_shares(splits)]
merged_data[, unadjusted_dividend := unadjust(dividend, splits, ...)]
}
#' Turn split adjusted dividends into unadjusted dividends.
#'
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#'
#' @param split_adjusted_dividend An \code{xts} of dividends, either 1 column or with a column name
#' matching dividend.
#' @param splits An \code{xts} of splits, either 1 column or with a column \code{spl}.
#' @param ... Additional arguments passed to unadjust.default, for example, \code{max_decimals}.
#' @return An \code{xts} of merged splits and dividends with new fields \code{shares}
#' and \code{unadjusted_dividend}. Missing splits are filled in with 1. Missing dividends are filled in with 0.
#'
#' @export
unadjust.xts <- function(split_adjusted_dividend, splits, ...) {
if( all(is.na(splits)) ) { #NA to support quantmod::getSplits
splits <- xts::xts( numeric(), as.Date( as.character(), tz = 'UTC'))
}
stopifnot( xts::is.xts(splits),
ncol(split_adjusted_dividend) ==1 ||
any(grepl("dividend", colnames(split_adjusted_dividend),
ignore.case = TRUE)),
ncol(splits) == 1 ||
any( grepl("spl", colnames(splits), ignore.case = TRUE ) ) )
if( ncol(splits) == 1 ) {
split_col <- colnames(splits)
} else {
split_col <- colnames(splits)[ grepl("spl", colnames(splits), ignore.case = TRUE)]
}
if( ncol(split_adjusted_dividend) == 1) {
dividend_col <- colnames(split_adjusted_dividend)
} else {
dividend_col <- colnames(split_adjusted_dividend)[ grepl("dividend",
colnames(split_adjusted_dividend),
ignore.case = TRUE)]
}
if( is.null(dividend_col) ) { #the case for quantmod::getDividends
colnames(split_adjusted_dividend) <- "dividend"
dividend_col <- "dividend"
}
if( is.null(split_col) ) { #not a real issue, but to fully support 1 column split xts
colnames(splits) <- "splits"
split_col <- "splits"
}
if( nrow(splits) == 0 && nrow(split_adjusted_dividend) == 0 ) {
merged_data <- xts::xts( data.frame( dividend = numeric(),
splits = numeric(),
shares = numeric(),
unadjusted_dividend = numeric() ),
order.by = as.Date( as.character(), tz = 'UTC') )
} else {
just_splits <- all( colnames(splits) %in% split_col )
just_dividends <- all( colnames(split_adjusted_dividend) %in% dividend_col )
div_not_subset_splits <- ! all( zoo::index(split_adjusted_dividend) %in%
zoo::index(splits) )
splits_not_subset_div <- ! all( zoo::index(splits) %in%
zoo::index(split_adjusted_dividend) )
if( splits_not_subset_div && ! just_dividends ) {
stop("unadjust can't fill non-dividend columns when merging with splits")
} else if( div_not_subset_splits && ! just_splits ) {
stop("unadjust can't fill non-split columns when merging with dividends")
} else { #if( div_not_subset_splits || splits_not_subset_div ) {
if( ! identical( zoo::index(splits), zoo::index(split_adjusted_dividend) ) ) {
merged_data <- xts::merge.xts(split_adjusted_dividend, splits, fill = 0)
n <- length(merged_data)
if( ! dividend_col %in% colnames(merged_data) ) {
merged_data <- cbind( xts::xts( data.frame( dividend = rep_len(0, n) ),
order.by = zoo::index(merged_data) ),
merged_data)
}
if( ! split_col %in% colnames(merged_data) ) {
merged_data <- cbind( merged_data,
xts::xts( data.frame( splits = rep_len(1, n) ),
order.by = zoo::index(merged_data) ) )
} else {
merged_data[ merged_data[,split_col] ==0, split_col] <- 1
}
} else {
merged_data <- cbind(split_adjusted_dividend, splits)
}
merged_data$shares <- xts::xts( make_shares(
as.numeric(merged_data[,split_col] ) ),
order.by = zoo::index(merged_data) )
merged_data$unadjusted_dividend <-
xts::xts( unadjust( as.numeric( merged_data[,dividend_col]),
as.numeric( merged_data[,split_col]),
...),
order.by = zoo::index(merged_data) )
}
}
merged_data
}
#' Make additive price adjustments without lookforward bias.
#'
#' Apply dividend and split data to price series without changing
#' past data. Changes are applied additively, perhaps multiplicative
#' will be added in the future. A dividend paid at time t will increase
#' the value at time t and all future times. A split at time t will
#' apply a factor at time t and all future times. The Rawvalue and
#' RawShares will be added.
#'
#' Note that splits and dividends that overlap may not be processed correctly.
#' Splits are processed first, as the price will be reported post-split, so
#' we assume the shares have already split, and the dividend is reported
#' per share. In practice, this may be false, in which case work would need to be done.
#'
#' TODO: Need to make sure it works when there is a split or dividend on a date with no price.
#'
#' @param price_data Price data for which to compute raw value. Currently in a confused state
#' where \code{data.table} and \code{xts} are both accepted.
#' @param dividend Split unadjusted dividends as provided by Yahoo, either as \code{data.table}
#' or \code{xts}.
#' @param splits Splits as provided by Yahoo, either as \code{data.table}
#' or \code{xts}.
#' @param ... Not sure yet.
#'
#' @return A \code{data.table} unless an \code{xts} was passed in containing new
#' columns with dividend, splits, rawshares, rawdividend, and rawvalue.
#'
#' @export
make_raw_value <- function(price_data, splits, dividend, ...) {
symbol <- index <- rawshares <- rawvalue <-
Close <- retroactive_shares <- rawdividend <- NULL
is_xts <- xts::is.xts(price_data)
if( is_xts ) {
xts_attr <- xts::xtsAttributes(price_data)
price <- as.data.table(price_data)
if( needs_symbol(price) ) {
price <- gather_symbol(price)
}
} else {
price <- copy(price_data)
}
has_symbol <- function(x) "symbol" %in% names(x)
has_some <- function(x,field) ! is.null(x) && ! all(is.na(x)) && ! all(is.na(x[,field, with = FALSE]))
if ( has_symbol(price) ) {
merge_key <- c("symbol", "index")
available_symbols <- price[, unique(symbol)]
} else {
merge_key <- c("index")
}
multi_symbol <- has_symbol(price) && length(available_symbols) > 1
if( xts::is.xts(dividend) ) {
dividend <- as.data.table(dividend)
if( needs_symbol(dividend) ) {
dividend <- gather_symbol(dividend)
}
if( "div" %in% names(dividend) ) {
setnames(dividend, "div", "dividend")
}
if( all( names(dividend) %in% c("index", "V1") ) ){
setnames(dividend, "V1", "dividend")
}
}
normalize <- function(x) {
if( is.data.table(x) ) {
if( ! has_symbol(x) ) {
if ( multi_symbol ) {
stop(deparse(substitute(x)),
" must have symbol for multisymbol price data")
} else if ( has_symbol(price)) {
x[, symbol := available_symbols]
}
} else {
multi_symbol <<- multi_symbol || x[,length(unique(symbol)) > 1]
if( multi_symbol && ! has_symbol(price) ) {
stop("price must have symbol for multisymbol ",
deparse(substitute(x)),
" data")
}
}
}
x
}
dividend <- normalize(dividend) # I thought the assignment was unnecessary because
# data.tables are passed by reference, but it wasn't always working??
if( xts::is.xts(splits) ) {
splits <- as.data.table(splits)
if( needs_symbol(splits) ) {
splits <- gather_symbol(splits)
}
if( "spl" %in% names(splits) ) {
setnames(splits, "spl", "split")
}
if( all( names(splits) %in% c("index", "V1") ) ){
setnames(splits, "V1", "split")
}
}
splits <- normalize(splits)
index_class <- class(price[,index])
is_date <- all( class(price[,index]) == "Date" )
if( ! is_date ) {
price[, index := as.Date(index)]
}
setkeyv(price, merge_key)
if( has_some(splits, "split") ) {
splits[, index := as.Date(index)]
setkeyv(splits, merge_key)
price_cols <- names(price)
extra_split_cols <- names(splits)[ !( names(splits) %in% price_cols)]
price <- splits[price, on=merge_key]
setcolorder(price, c(price_cols, extra_split_cols))
# price <- merge( price, splits, by = merge_key, all.x = TRUE)
price[is.na(split), split := 1]
if ( has_symbol(price) ) {
price[, rawshares := 1 / cumprod(split), by = symbol ]
} else {
price[, rawshares := 1 / cumprod(split)]
}
price[, rawvalue := rawshares * Close]
} else {
price[, split := 1]
price[, rawshares := 1]
price[, rawvalue := Close]
}
if( has_some(dividend, "dividend") ) {
dividend[, index := as.Date(index)]
if( has_some(splits, "split") ) {
dividend <- merge(dividend, splits, by = merge_key, all = TRUE)
dividend[is.na(dividend), dividend := 0]
dividend[is.na(split), split := 1]
if( has_symbol(price) ) {
dividend[, rawshares := 1/cumprod(split), by = symbol]
dividend[, retroactive_shares := last(rawshares) / rawshares, by = symbol]
dividend[, rawdividend := retroactive_shares * dividend, by = symbol]
} else {
dividend[, rawshares := 1/cumprod(split)]
dividend[, retroactive_shares := last(rawshares) / rawshares]
dividend[, rawdividend := retroactive_shares * dividend]
}
dividend[, split := NULL]
dividend[, rawshares := NULL]
dividend[, retroactive_shares := NULL] #maybe keep this
} else {
dividend[, rawdividend := dividend]
}
price_cols <- names(price)
extra_div_cols <- names(dividend)[ ! ( names(dividend) %in% price_cols ) ]
setkeyv(dividend, merge_key)
price <- dividend[price, on=merge_key]
setcolorder(price, c(price_cols, extra_div_cols))
# price <- merge(price, dividend, by = merge_key, all.x = TRUE)
price[is.na(dividend), dividend := 0]
price[is.na(rawdividend), rawdividend := 0]
if( has_symbol( price ) ) {
price[, rawvalue := rawvalue + cumsum(rawshares * rawdividend), by = symbol]
} else {
price[, rawvalue := rawvalue + cumsum(rawshares * rawdividend)]
}
} else {
price[, dividend := 0]
price[, rawdividend := 0]
}
if( ! is_date ) {
if ( all( index_class == c("POSIXct", "POSIXt") ) ) {
sorted_attr <- attr(price,"sorted")
price[, index := as.POSIXct( as.character(index) )]
attr(price,"sorted") <- sorted_attr
} else{
stop("index must be POSIXct or Date currently.")
}
}
if( is_xts ) {
price <- xts::as.xts(spread_symbol(price))
xts::xtsAttributes(price) <- xts_attr
}
price
}
#' Compute returns as value change plus dividends.
#'
#' @param ... Additional arguments.
#' @return Return data assuming shares are split according to split data
#' and dividends are paid, one for each share, but not reinvested.
#'
#' @export
make_raw_return <- function(...) UseMethod("make_raw_return")
#' @describeIn make_raw_return
#'
#' Compute raw return from split adjusted shares, dividends, and closing prices.
#'
#' Computes the return of buying the given number of shares at the first closing price
#' and exiting at any intermediate closing price including splits and dividends. The
#' number of shares should only change based on splits; no accounting is done to rebase
#' the return for other purchases and sales of shares. The dividends must be as they would
#' historically have been reported, not split adjusted as Yahoo provides. A dividend on
#' the first tick is not included in the return as Yahoo reports them on the ex-date.
#'
#' @param split_adjusted_shares Numeric vector of number of shares accounting
#' for share splits. Typically, starts with 1 and adjusts for each split to reflect what
#' that 1 share has become.
#' @param split_unadjusted_dividends Numeric vector of dividends as they would
#' have been announced historically. The values returned from Yahoo are split adjusted.
#' For example, a dividend of 0.2 that occured before a 2:1 split would be reported as
#' 0.1 by Yahoo (assuming no subsequent splits). This function expects the 0.2. See
#' \code{\link{unadjust}}.
#' @param price_data The historical prices, usually closes, with no adjustments.
#' @param initial_rawclose Optional starting value to measure returns against. Default is
#' NULL, i.e., just use the first ticks from the other data vectors. If used, the first
#' dividend will be included.
#'
#' @return A numeric vector the same length as the inputs with the arithmetic return from
#' the first close including dividends.
#'
#' @export
make_raw_return.default <- function(price_data, split_adjusted_shares, split_unadjusted_dividends,
initial_rawclose = NULL, ...) {
rawclose <- split_adjusted_shares * price_data
if( is.null(initial_rawclose) ) {
initial_rawclose <- rawclose[1]
paid_dividends <- function(x) c(0, x[-1])
} else {
paid_dividends <- function(x) x
}
(rawclose + cumsum(split_adjusted_shares * paid_dividends(split_unadjusted_dividends) ) -
initial_rawclose)/ initial_rawclose
}
#' @describeIn make_raw_return
#'
#' Compute period returns as value change plus dividends.
#'
#' @param price_data_xts An \code{xts} object with columns \code{Close, rawshares, rawdividend}
#' or with a symbol \code{SYM}, columns \code{SYM.Close, SYM.Rawshares, SYM.Rawdividend}.
#' @param period A string indicating a granularity. See \code{\link[quantmod]{periodReturn}}.
#'
#' @return An \code{xts} object ordered by the \code{index} column granulated to period. The
#' returns are calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#'
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#'
#' @export
make_raw_return.xts <- function(price_data_xts, period = 'monthly', ...) {
index <- NULL
period_opts <- list(daily = "days", weekly = "weeks", monthly = "months",
quarterly = "quarters", yearly = "years", annually = "years")
end_points <- xts::endpoints( index(price_data_xts),
on = period_opts[[period]])
end_points <- end_points[-1] #drop initial 0
if( ! 1 %in% end_points ) end_points <- c(1, end_points)
n <- length(end_points)
price_data_dt <- as.data.table(price_data_xts)
if( needs_symbol(price_data_dt) ) {
price_data_dt <- gather_symbol(price_data_dt)
}
raw_ret_dt <- make_raw_return( price_data_xts,
start = end_points[-n],
end = end_points[-1], ... )
if ( period == 'daily' ) {
initial_zero_to_match_quantmod <- data.table(index = index(price_data_xts[1,]),
rawreturn = 0)
raw_ret <- xts::as.xts( rbind( initial_zero_to_match_quantmod,
raw_ret_dt ) )
} else {
# dt version gives intermediate returns between start and end
# to match quantmod, only return the values at endpoints
# end_points[-1]: remove initial starting point
# -1: indexing is off by 1 because make_raw_return will not have a return on the
# first tick
raw_ret <- xts::as.xts( raw_ret_dt[end_points[-1]-1,] )
}
colnames(raw_ret) <- paste(period, "rawreturn", sep = "_")
raw_ret
}
#' @describeIn make_raw_return
#'
#' Compute raw return between start and end.
#'
#' @param price_data_dt A \code{data.table} of price data as produced by \code{make_raw_data}.
#' In particular, it must have columns \code{close, rawshares, rawdividend}.
#' @param start,end Index vectors (numeric or logical) of starting/ending points.
#' They should be the same length. If logical, they should have the same number
#' of TRUE values.
#' @return A \code{data.table} of raw returns with columns index and rawreturn.
#' If the (start,end] ranges do not overlap, the returns are a running calculation from
#' each start indexed from start+1 to end. If there the (start,end] ranges do not partition
#' the index of the input, the missing indexes are omitted (especially useful when looking
#' at trade returns in which you are not always in the market). If there is overlap in
#' the (start,end] ranges, there is just one return calculated for each end indexed by the
#' index at end.
#'
#' @export
make_raw_return.data.table <- function(price_data_dt, start = 1, end = nrow(price_data_dt), ...) {
n <- nrow(price_data_dt)
period_index <- index <- on_period <- Close <- rawshares <-
rawdividend <- rawreturn <- NULL
if( xts::timeBased(end) ) {
end_index <- price_data_dt[, which(as.IDate(index) %in% as.IDate(end)) ]
} else {
end_index <- end
}
if( xts::timeBased(start) ) {
start_index <- price_data_dt[, which(as.IDate(index) %in% as.IDate(start)) ]
} else {
start_index <- start
}
n_intervals <- length(start_index)
price_data_dt[, period_index := 0]
price_data_dt[end_index, period_index := 1]
price_data_dt[start_index, period_index := period_index + 1]
price_data_dt[, period_index := c(0, cumsum( period_index)[-n]) ]
price_data_dt[, on_period := period_index %in% period_index[end_index]]
initial_rawclose <- price_data_dt[start_index, Close * rawshares]
overlap <- any( start_index[-1] < end_index[-n_intervals])
if( overlap ) {
overlapped_dividends <- numeric(length(initial_rawclose))
for( idx in seq_along(start_index)) {
overlapped_dividends[idx] <- price_data_dt[(start_index[idx]+1):end_index[idx],
sum(rawdividend * rawshares / last(rawshares) )]
}
raw_ret <- price_data_dt[(on_period),
list(rawreturn = make_raw_return(last(Close),
last(rawshares),
overlapped_dividends[.GRP],
initial_rawclose[.GRP],...) ),
by = period_index]
} else {
raw_ret <- price_data_dt[(on_period),
list(rawreturn = make_raw_return(Close,
rawshares,
rawdividend,
initial_rawclose[.GRP],...)),
by = period_index]
}
ret <- data.table( index = price_data_dt[(on_period), index],
rawreturn = raw_ret[,rawreturn])
price_data_dt[, period_index := NULL]
price_data_dt[, on_period := NULL]
ret
# Dividends are paid to the holder on the previous day in Yahoo data.
# Therefore, they need to be offset by 1 as to which period they are
# paid in. A dividend on the initial day should not be included because
# it would be paid in the previous period. If the initial period is partial,
# I suppose it should be included, but currently it's not. The first period
# isn't perfect anyway because we don't have the true starting price.
}
#' Compute shares held based on splits and reinvested dividends.
#'
#' For a sequence of closing prices, split adjusted shares, and split unadjusted dividends,
#' (i.e., close, raw_shares, and raw_dividends), compute the number or shares held if the
#' dividends are reinvested.
#'
#' This function assumes that a dividend at tick t is paid to the stock owners at tick t-1 (i.e.,
#' t is the ex-date in accordance with how Yahoo reports dividends). Therefore, a dividend
#' on the initial tick is assumed not to be received.
#'
#' Per the ordinary dividends received on mutual funds in my TD Ameritrade account, the
#' reinvestment of a dividend on date t is done at a trade price of the close on date t-1.
#' This may be a bad assumption in other cases. And in yet other cases, it is not
#' practical to reinvest your dividends at all (stocks in my TD Ameritrade account). If
#' your account is big enough to reinvest dividends, this function may need to be
#' modified to match your reinvestment procedure. Also for large accounts, you may
#' need to find share buyback data to accurately reflect other ways cash will be returned
#' to you (not handled in this function).
#'
#' @param close A numeric vector of closing prices, not adjusted for splits or dividends.
#' @param split_adjusted_shares A numeric vector of shares held. The shares should already
#' account for splits (e.g., if you start with 1 share and there is a 2:1 split, you should
#' have 2 shares after the split).
#' @param unadjusted_dividends A numeric vector of dividends paid. These should not be adjusted
#' for splits. Yahoo adjusts their dividends for splits. The make_raw_value function can create
#' raw dividends (i.e., undo the split adjustment).
#'
#' @return A numeric vector of shares after splits and reinvestment of dividends.
#'
#' @export
make_reinvested_shares <- function(close, split_adjusted_shares, unadjusted_dividends) {
split_adjusted_shares*cumprod(1+unadjusted_dividends/c(Inf,close[-length(close)]))
}
#' Compute period returns assuming reinvestment of dividends.
#'
#' @param price_data Price data on which to compute returns
#' @param ... Additional arguments to pass to specializations.
#'
#' @return Return data calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#'
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#'
#' @export
make_reinvested_return <- function(price_data, ...) UseMethod("make_reinvested_return")
#' Compute reinvested return from split adjusted shares, dividends, and closing prices.
#'
#' Computes the return of buying the given number of shares at the first closing price
#' and exiting at any intermediate closing price including splits and dividends. The
#' number of shares will change based on splits and reinvesting dividends;
#' no accounting is done to rebase
#' the return for other purchases and sales of shares. The dividends must be as they would
#' historically have been reported, not split adjusted as Yahoo provides. A dividend on
#' the first tick is not included in the return as Yahoo reports them on the ex-date.
#'
#' @param price_data The historical price data with no adjustments.
#' @param split_adjusted_shares Numeric vector of number of shares accounting
#' for share splits. Typically, starts with 1 and adjusts for each split to reflect what
#' that 1 share has become.
#' @param unadjusted_dividends Numeric vector of dividends as they would
#' have been announced historically. The values returned from Yahoo are split adjusted.
#' For example, a dividend of 0.2 that occured before a 2:1 split would be reported as
#' 0.1 by Yahoo (assuming no subsequent splits). This function expects the 0.2. Available
#' as \code{rawdividend} as returned by \code{make_raw_value}.
#' @param initial_price Optional starting price to measure returns against. Default is
#' NULL, i.e., just use the first tick from the \code{price_data} vector. If used, the first
#' dividend will be included and initial_shares must be provided.
#' @param initial_shares Optional starting shares to measure returns against. Default is NULL,
#' i.e., just use the first tick in the split_adjusted_shares vector. If used, the
#' initial_close must be provided.
#' @param ... Additional arguments currently unused.
#'
#' @return A numeric vector the same length as the inputs with the arithmetic return from
#' the first close assuming dividends are reinvested in shares per make_reinvested_shares.
#'
#' @export
make_reinvested_return.default <- function(price_data, split_adjusted_shares, unadjusted_dividends,
initial_price = NULL, initial_shares = NULL, ...) {
if( is.null(initial_price) && is.null(initial_shares) ) {
reinvested_shares <- make_reinvested_shares( price_data, split_adjusted_shares, unadjusted_dividends)
initial_value <- reinvested_shares[1] * price_data[1]
} else {
reinvested_shares <- make_reinvested_shares( c(initial_price, price_data),
c(initial_shares, split_adjusted_shares),
c(0, unadjusted_dividends) )[-1]
initial_value <- initial_shares * initial_price
}
(price_data*reinvested_shares - initial_value ) / initial_value
}
#' Compute reinvested return between start and end.
#'
#' @param price_data A data.frame of prices as produced by make_raw_data. In particular,
#' requires columns index, close, rawshares, and rawdividend.
#' @param start An index vector (numeric or logical) of starting points.
#' @param end An index vector (numeric or logical) of ending poitns.
#' @param ... Additional arguments.
#' @return A data.frame of reinvested returns with columns index and reinvested_return.
#' If the (start,end] ranges do not overlap, the returns are a running calculation from
#' each start indexed from start+1 to end. If there the (start,end] ranges do not partition
#' the index of the input, the missing indexes are omitted (especially useful when looking
#' at trade returns in which you are not always in the market). If there is overlap in
#' the (start,end] ranges, there is just one return calculated for each end indexed by the
#' index at end.
#'
#' @export
make_reinvested_return.data.table <- function(price_data, start = 1, end = nrow(price_data), ...) {
n <- nrow(price_data)
index <- period_index <- on_period <- Close <- rawshares <-
rawdividend <- reinvested_return <- NULL
if( xts::timeBased(end) ) {
end_index <- price_data[, which(as.IDate(index) %in% as.IDate(end)) ]
} else {
end_index <- end
}
if( xts::timeBased(start) ) {
start_index <- price_data[, which(as.IDate(index) %in% as.IDate(start)) ]
} else {
start_index <- start
}
n_intervals <- length(start_index)
price_data[, period_index := 0]
price_data[end_index, period_index := 1]
price_data[start_index, period_index := period_index + 1]
price_data[, period_index := c(0, cumsum( period_index)[-n]) ]
price_data[, on_period := period_index %in% period_index[end_index]]
initial_close <- price_data[start_index, Close]
initial_shares <- price_data[start_index, rawshares]
overlap <- any( start_index[-1] < end_index[-n_intervals])
if( overlap ) {
# overlapped_shares <- numeric(length(initial_rawclose))
overlapped_dividend <- numeric(length(initial_close))
for( idx in seq_along(start_index)) {
# overlapped_shares[idx] <- price_data[start_index[idx]:end_index[idx],
# make_reinvested_shares( close, rawshares, rawdividend ) ]
overlapped_dividend[idx] <- price_data[start_index[idx]:end_index[idx],
( last( make_reinvested_shares( Close, rawshares, rawdividend) ) /
last(rawshares) - 1 ) *
initial_close[idx] ]
}
# overlapped_dividend <- overlapped_shares
ret_data <- price_data[(on_period), list(reinvested_return =
make_reinvested_return(last(Close),
last(rawshares),
overlapped_dividend[.GRP],
initial_close[.GRP],
initial_shares[.GRP],...) ),
by = period_index]
} else {
ret_data <- price_data[(on_period),
list(reinvested_return = make_reinvested_return(Close,
rawshares,
rawdividend,
initial_close[.GRP],
initial_shares[.GRP],...)),
by = period_index]
}
ret <- data.table( index = price_data[(on_period), index],
reinvested_return = ret_data[,reinvested_return])
price_data[, period_index := NULL]
price_data[, on_period := NULL]
ret
# Dividends are paid to the holder on the previous day in Yahoo data.
# Therefore, they need to be offset by 1 as to which period they are
# paid in. A dividend on the initial day should not be included because
# it would be paid in the previous period. If the initial period is partial,
# I suppose it should be included, but currently it's not. The first period
# isn't perfect anyway because we don't have the true starting price.
}
#' Compute period returns by reinvesting within period dividends.
#'
#' @param price_data An \code{xts} object with columns \code{Close, rawshares, rawdividend}
#' or with a symbol \code{SYM}, columns \code{SYM.Close, SYM.Rawshares, SYM.Rawdividend}.
#' @param period A string indicating a granularity. See quantmod::periodReturn.
#' @param ... Additional arguments.
#'
#' @return An \code{xts} object ordered by the \code{index} column granulated to period. The
#' returns are calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#'
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#'
#' @export
make_reinvested_return.xts <- function(price_data, period = 'monthly', ...) {
index <- NULL
period_opts <- list(daily = "days", weekly = "weeks", monthly = "months",
quarterly = "quarters", yearly = "years", annually = "years")
end_points <- xts::endpoints( index(price_data),
on = period_opts[[period]])
end_points <- end_points[-1] #drop initial 0
if( ! 1 %in% end_points ) end_points <- c(1, end_points)
n <- length(end_points)
price_dt <- as.data.table(price_data)
if( needs_symbol(price_dt) ) {
price_dt <- gather_symbol(price_dt)
}
reinvested_ret_dt <- make_reinvested_return( price_dt,
start = end_points[-n],
end = end_points[-1], ... )
if ( period == 'daily' ) {
initial_zero_to_match_quantmod <- data.table(index = index(price_data[1,]),
reinvested_return = 0)
reinvested_ret <- xts::as.xts( rbind( initial_zero_to_match_quantmod,
reinvested_ret_dt ) )
} else {
# dt version gives intermediate returns between start and end
# to match quantmod, only return the values at endpoints
# end_points[-1]: remove initial starting point
# -1: indexing is off by 1 because make_reinvested_return will not have a return on the
# first tick
reinvested_ret <- xts::as.xts( reinvested_ret_dt[end_points[-1]-1,] )
}
colnames(reinvested_ret) <- paste(period, "reinvested_return", sep = "_")
reinvested_ret
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.