adjustr: Adjust Stock Price Data for Splits and Dividends

Documented in check_update getDTSymbols make_raw_return make_raw_return.data.table make_raw_return.default make_raw_return.xts make_raw_value make_reinvested_return make_reinvested_return.data.table make_reinvested_return.default make_reinvested_return.xts make_reinvested_shares make_shares unadjust unadjust.data.table unadjust.default unadjust.xts

#' @import data.table
NULL

#' Unsplit the Close from an xts
#' 
#' \code{remove_autosplit} allows \code{quantmod::getSymbols.yahoo} to
#' return closes that are not split adjusted. There is no change
#' in \code{quantmod}, but Yahoo has changed what they return.
#' 
#' @param prices  An xts with a Close as in quantmod.
#' @param splits  An xts with the splits for the prices.
#' 
#' @return The xts with the splits undone.
#' 
#' @export
remove_autosplit <- function(prices, splits) {
  all_dates <- unique(sort(c(zoo::index(prices), zoo::index(splits))))
  n_dates <- length(all_dates)
  shares <- xts::xts(rep(1, n_dates), order.by=all_dates)
  shares[zoo::index(splits),] <- 1/splits
  shares <- stats::lag(shares, k=-1)
  shares[n_dates,] <- 1 # lag introduces NA at most recent date
  cumshares <- xts::xts( rev( cumprod( rev( drop( zoo::coredata(shares) ) ) ) ),
                         order.by = zoo::index(shares))
  close_col <- grep("close", colnames(prices), ignore.case=TRUE)
  unsplit_prices <- prices
  unsplit_prices[,close_col] <- prices[,close_col] * cumshares[zoo::index(prices),]
  unsplit_prices
}

clean_duplicate_dates <- function(x) {
  `%>%` <- magrittr::`%>%`
  dates <- x %>% as.data.frame %>% row.names %>% as.Date
  with_dates <- x %>% as.data.frame %>% dplyr::mutate(Date=dates)
  Date <- n <- NULL
  date_count <- with_dates %>% dplyr::count(Date)
  duplicates <- (date_count %>% dplyr::pull(n)) > 1
  if( any(duplicates) ) {
    warning(paste0("Multiple rows for dates ", date_count %>% dplyr::filter(duplicates)))
  }
  y <- with_dates %>%
    dplyr::group_by(Date) %>%
    dplyr::slice(dplyr::n()) %>%
    dplyr::ungroup()
  xts::as.xts(dplyr::select(y, -Date), order.by=y[["Date"]])
}
needs_symbol <- function(x) any( grepl(".", names(x), fixed=TRUE) )

#' Retrieve stock prices as data.table
#' 
#' \code{getDTSymbols} adapts \code{quantmod::getSymbols} to convert
#' results to \code{data.table}.
#' 
#' @param x A symbol or list of symbols to retrieve from Yahoo and adjust.
#' @param ... Additional arguments passed to quantmod::getSymbols.
#' @param cache  Whether to check cache for data before calling quantmod::getSymbols.
#' 
#' @return A \code{data.table} of data for input symbols with adjustments.
#' 
#' @export
getDTSymbols <- function(x, ..., cache=TRUE) {
  argg <- list(...)
  get_arg <- function(var) {
    expr <- substitute(hasArg(y), list(y=var))
    if( eval(expr, parent.frame()) ) {
      value <- argg[[var]]
    } else {
      value <- formals(quantmod::getSymbols.yahoo)[[var]] # TODO move away from yahoo default
    }
    eval(value)
  }
  start_date <- get_arg("from")
  end_date <- get_arg("to")
  
  index <- NULL
  results <- list()
  for( symbol in x ) {
    cache_file <- get_cache_file(symbol, start_date)
    found_end_date <- as.Date("1000-01-01")
    cache_exists <- file.exists(cache_file)
    if( cache_exists ) {
      data <- load_cache(cache_file)
      found_end_date <- data[, max(index)] + 1 # yahoo end date is non-inclusive
    } 
    if( found_end_date < end_date ) {
      getSymbols <- quantmod::getSymbols # getSymbols doesn't expect to see the 
      # package name when it retrieves its defaults (gives a warning).
      # Do this rather than importing getSymbols.
      price <- getSymbols(symbol, ...)
      splits <- quantmod::getSplits(symbol, ...)
      price <- remove_autosplit(price, splits) # Yahoo has started returning split-adjusted closes
      price <- clean_duplicate_dates(price)
      dividends <- quantmod::getDividends(symbol, ...)
      raw <- make_raw_value(price, splits, dividends)
      new_data <- gather_symbol( as.data.table(raw) )
      setkey(new_data, symbol, index)
      if( nrow(new_data) > 0 ) {
        if( cache && cache_exists )
          check_update(data, new_data)
        if( cache ) 
          save_cache(new_data, cache_file)
        data <- new_data
        found_end_date <- data[, max(index)] + 1 # yahoo end date is non-inclusive
      }
      if( found_end_date < end_date ) {
        warning(symbol, " data stops at ", found_end_date)
      }
    }
    results[[symbol]] <- data
  }
  rbindlist(results)
}

#' Compare whether two data sets agree on overlap.
#' 
#' \code{check_update} compares the loaded data for two data sets to 
#' make sure that the second is an update of the first. Throws an error
#' if the is something nontrivial in the update.
#' 
#' @param old  Older, presumably smaller set of data.
#' @param new  Newer, presumably bigger set of data.
#' 
#' @return TRUE if the sets agree on the overlap.
check_update <- function(old, new) {
  Close <- i.Close <- i.split <- rawvalue  <- i.rawvalue <- rawshares <-
    retroactive_shares <- dividend <- i.dividend <- NULL
  DIVIDEND_PRECISION <- 1e-12
  stopifnot( key(old) == key(new))
  overlap <- new[old, on=key(old)]
  # cols_to_match <- c("Close", "split")
  mismatch <- overlap[, which(Close != i.Close | split != i.split | rawvalue != i.rawvalue)]
  # Now check dividend, which may be adjusted differently if there
  # are splits in new that are not in old
  # rawshares <- overlap[, 1/cumprod(split)]
  dividends <- new[, c(key(new), "dividend"), with=FALSE]
  dividends[, rawshares := new[, 1/cumprod(split)] ]
  dividends[, retroactive_shares := last(rawshares) / rawshares]
  raw_dividends <- dividends[old, retroactive_shares*dividend, on=key(old)]
  old_rawshares <- overlap[, 1/cumprod(i.split)]
  old_retroactive_shares <- last(old_rawshares) / old_rawshares
  readjusted_dividends <- raw_dividends / old_retroactive_shares
  dividend_mismatch <- which( abs( readjusted_dividends - overlap[, i.dividend]) > DIVIDEND_PRECISION)
  last_old_index <- nrow(overlap)
  # Exclude mismatches if the last line of the cache didn't get the dividend at the time of creation
  if( last_old_index %in% dividend_mismatch && #  dividend mismatch
      last_old_index %in% mismatch ) { # rawvalue mismatch
    last_old <- overlap[last_old_index,]
    if( last_old[,i.dividend==0 && abs(rawvalue - rawshares * dividend - i.rawvalue) < DIVIDEND_PRECISION ] ) {
      key_str <- vapply(key(old), function(key_col) {
        as.character(last_old[[ key_col]])
      }, "") # want to get date string formatted
      warning_message <- paste0("Cache was missing dividend on ",
                                paste(key_str, collapse=", "),
                                ". Raw value increased from ",
                                last_old[,i.rawvalue],
                                " to ",
                                last_old[,rawvalue],
                                ".")
      warning(warning_message)
      mismatch <- setdiff(mismatch, last_old_index)
      dividend_mismatch <- setdiff(dividend_mismatch, last_old_index)
    }
  }
  total_mismatch <- sort( c( mismatch, dividend_mismatch) )
  if( length(total_mismatch) > 0 ) {
    first_mismatch <- overlap[total_mismatch[1], key(old), with=FALSE]
    problems <- rbind( old[first_mismatch][, version := "cached"], new[first_mismatch][, version := "new"])
    problems_str <- paste0("\n", paste(utils::capture.output(print(problems)), collapse="\n"))
    stop("New data is not an update of old data. Check cache via load_cache vs. getDTSymbols(...,cache=FALSE): ", 
         problems_str)
  }
  TRUE
}

#' Turn splits into evolution of single share.
#' 
#' @param splits A numeric vector of splits. Most values will be 1. A 2:1 split
#' is represented as 0.5. 
#' @return A numeric vector of shares held if splits are applied to an
#' initial position of 1 share. 
#' 
#' @export
make_shares <- function(splits) {
  cumprod(1/splits)
}

#' Turn split adjusted dividends into unadjusted dividends.
#' 
#' @param split_adjusted_dividend Split adjusted dividends for a stock.
#' @param splits Stock split data.
#' @param ... Additional arguments.
#' @export
unadjust <- function(split_adjusted_dividend, splits, ...) UseMethod('unadjust')

#' Turn split adjusted dividends into unadjusted dividends.
#' 
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#' 
#' @param split_adjusted_dividend A numeric vector of split adjusted dividends.
#' @param splits A numeric vector of splits.
#' @param ... Additional arguments.
#' @param max_decimals The number of decimal places that unadjusted dividends may have.
#' @return A numeric vector of unadjusted dividends.
#' 
#' @export
unadjust.default <- function(split_adjusted_dividend, splits,..., max_decimals = 3) {
  n <- length(splits)
  stopifnot( n == length(split_adjusted_dividend) )
  shares <- make_shares(splits)
  round(split_adjusted_dividend * shares[n] / shares, digits = max_decimals)
}

#' Turn split adjusted dividends into unadjusted dividends.
#' 
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#' 
#' @param split_adjusted_dividend A \code{data.table} with a \code{dividend} column and
#' an \code{index} column to merge on. 
#' @param splits A \code{data.table} with columns \code{splits} and \code{index}.
#' @param ... Additional arguments passed to \code{unadjust.default}, for example, \code{max_decimals}.
#' @return A \code{data.table} of merged splits and dividends with new fields \code{shares}
#' and \code{unadjusted_dividend}. The merge matches on the \code{index} field. Missing
#  splits are filled in with 1. Missing dividends are filled in with 0.
#' 
#' @export
unadjust.data.table <- function(split_adjusted_dividend, splits, ...) {
  stopifnot( is.data.table(splits), 
             all( c('index', 'dividend') %in% names(split_adjusted_dividend) ),
             all( c('index', 'splits') %in% names(splits) ) )
  index <- dividend <- shares <- unadjusted_dividend <- NULL
  just_splits <- all( names(splits) %in% c('index', 'splits') )
  just_dividends <- all( names(split_adjusted_dividend) %in% c('index', 'dividend') )
  
  div_not_subset_splits <- ! all( split_adjusted_dividend[, index] %in% splits[, index] )
  splits_not_subset_div <- ! all( splits[,index] %in% split_adjusted_dividend[, index] )
  
  if( splits_not_subset_div && ! just_dividends ) {
    stop("unadjust can't fill non-dividend columns when merging with splits")
  } else if( div_not_subset_splits && ! just_splits ) {
    stop("unadjust can't fill non-split columns when merging with dividends")
  } else  { #if( div_not_subset_splits || splits_not_subset_div ) {
    merged_data <- merge(split_adjusted_dividend, splits, by = 'index', all = TRUE)
    merged_data[ is.na(splits), splits := 1]
    merged_data[ is.na(dividend), dividend := 0]
  } 
#   else { # splits and dividends must have the same index 
#     merged_data
#   }
  merged_data[, shares := make_shares(splits)]
  merged_data[, unadjusted_dividend := unadjust(dividend, splits, ...)]
}

#' Turn split adjusted dividends into unadjusted dividends.
#' 
#' Assumes that unadjusted dividends are only accurate to 3 decimal places
#' unless \code{max_decimals} specifies otherwise.
#' 
#' @param split_adjusted_dividend An \code{xts} of dividends, either 1 column or with a column name
#' matching dividend.
#' @param splits An \code{xts} of splits, either 1 column or with a column \code{spl}.
#' @param ... Additional arguments passed to unadjust.default, for example, \code{max_decimals}.
#' @return An \code{xts} of merged splits and dividends with new fields \code{shares}
#' and \code{unadjusted_dividend}. Missing splits are filled in with 1. Missing dividends are filled in with 0.
#' 
#' @export
unadjust.xts <- function(split_adjusted_dividend, splits, ...) {
  if( all(is.na(splits)) ) { #NA to support quantmod::getSplits 
    splits <- xts::xts( numeric(), as.Date( as.character(), tz = 'UTC'))
  }
  stopifnot( xts::is.xts(splits), 
             ncol(split_adjusted_dividend) ==1 || 
               any(grepl("dividend", colnames(split_adjusted_dividend),
                         ignore.case = TRUE)),
             ncol(splits) == 1 ||
               any( grepl("spl", colnames(splits), ignore.case = TRUE ) ) )
  if( ncol(splits) == 1 ) {
    split_col <- colnames(splits) 
  } else {
    split_col <- colnames(splits)[ grepl("spl", colnames(splits), ignore.case = TRUE)]
  }
  if( ncol(split_adjusted_dividend) == 1) {
    dividend_col <- colnames(split_adjusted_dividend)
  } else {
    dividend_col <- colnames(split_adjusted_dividend)[ grepl("dividend", 
                                          colnames(split_adjusted_dividend), 
                                          ignore.case = TRUE)]
  }
  if( is.null(dividend_col) ) { #the case for quantmod::getDividends
    colnames(split_adjusted_dividend) <- "dividend"
    dividend_col <- "dividend"
  }
  if( is.null(split_col) ) { #not a real issue, but to fully support 1 column split xts
    colnames(splits) <- "splits"
    split_col <- "splits"
  }
  if( nrow(splits) == 0 && nrow(split_adjusted_dividend) == 0 ) {
    merged_data <- xts::xts( data.frame( dividend = numeric(),
                                         splits = numeric(),
                                         shares = numeric(),
                                         unadjusted_dividend = numeric() ),
                             order.by = as.Date( as.character(), tz = 'UTC') )
  } else {
    just_splits <- all( colnames(splits) %in% split_col )
    just_dividends <- all( colnames(split_adjusted_dividend) %in% dividend_col )
    
    div_not_subset_splits <- ! all( zoo::index(split_adjusted_dividend) %in% 
                                      zoo::index(splits) )
    splits_not_subset_div <- ! all( zoo::index(splits) %in% 
                                      zoo::index(split_adjusted_dividend) )
    
    if( splits_not_subset_div && ! just_dividends ) {
      stop("unadjust can't fill non-dividend columns when merging with splits")
    } else if( div_not_subset_splits && ! just_splits ) {
      stop("unadjust can't fill non-split columns when merging with dividends")
    } else  { #if( div_not_subset_splits || splits_not_subset_div ) {
        
      if( ! identical( zoo::index(splits), zoo::index(split_adjusted_dividend) ) ) {
        merged_data <- xts::merge.xts(split_adjusted_dividend, splits, fill = 0)
        n <- length(merged_data)
        if( ! dividend_col %in% colnames(merged_data) ) {
          merged_data <- cbind( xts::xts( data.frame( dividend = rep_len(0, n) ),
                                          order.by = zoo::index(merged_data) ),
                                merged_data)
        }
        if( ! split_col %in% colnames(merged_data) ) {
          merged_data <- cbind( merged_data,
                                xts::xts( data.frame( splits = rep_len(1, n) ),
                                          order.by = zoo::index(merged_data) ) )
        } else {
          merged_data[ merged_data[,split_col] ==0, split_col] <- 1
        }
      } else {
        merged_data <- cbind(split_adjusted_dividend, splits)
      }
      merged_data$shares <- xts::xts( make_shares( 
                                        as.numeric(merged_data[,split_col] ) ),
                                      order.by = zoo::index(merged_data) )
      merged_data$unadjusted_dividend <- 
        xts::xts( unadjust( as.numeric( merged_data[,dividend_col]), 
                            as.numeric( merged_data[,split_col]), 
                            ...),
                  order.by = zoo::index(merged_data) )
    }
  }
  merged_data
}

#' Make additive price adjustments without lookforward bias.
#' 
#' Apply dividend and split data to price series without changing
#' past data. Changes are applied additively, perhaps multiplicative 
#' will be added in the future. A dividend paid at time t will increase
#' the value at time t and all future times. A split at time t will 
#' apply a factor at time t and all future times. The Rawvalue and
#' RawShares will be added.
#' 
#' Note that splits and dividends that overlap may not be processed correctly.
#' Splits are processed first, as the price will be reported post-split, so
#' we assume the shares have already split, and the dividend is reported 
#' per share. In practice, this may be false, in which case work would need to be done.
#' 
#' TODO: Need to make sure it works when there is a split or dividend on a date with no price.
#' 
#' @param price_data Price data for which to compute raw value. Currently in a confused state
#'     where \code{data.table} and \code{xts} are both accepted.
#' @param dividend Split unadjusted dividends as provided by Yahoo, either as \code{data.table}
#'     or \code{xts}.
#' @param splits Splits as provided by Yahoo, either as \code{data.table}
#'     or \code{xts}.
#' @param ... Not sure yet.
#'     
#' @return A \code{data.table} unless an \code{xts} was passed in containing new
#'     columns with dividend, splits, rawshares, rawdividend, and rawvalue.
#'     
#' @export 
make_raw_value <- function(price_data, splits, dividend, ...) {
  symbol <- index <- rawshares <- rawvalue <- 
    Close <- retroactive_shares <- rawdividend <- NULL
  is_xts <- xts::is.xts(price_data)
  if( is_xts ) {
    xts_attr <- xts::xtsAttributes(price_data)
    price <- as.data.table(price_data)
    if( needs_symbol(price) ) {
      price <- gather_symbol(price)
    }
  } else {
    price <- copy(price_data)
  }
  has_symbol <- function(x) "symbol" %in% names(x)
  has_some <- function(x,field) ! is.null(x) && ! all(is.na(x)) && ! all(is.na(x[,field, with = FALSE]))
  if ( has_symbol(price) ) {
    merge_key <- c("symbol", "index")
    available_symbols <- price[, unique(symbol)]
  } else {
    merge_key <- c("index")
  }
  multi_symbol <- has_symbol(price) && length(available_symbols) > 1
  if( xts::is.xts(dividend) ) {
    dividend <- as.data.table(dividend)
    if( needs_symbol(dividend) ) {
      dividend <- gather_symbol(dividend)
    }
    if( "div" %in% names(dividend) ) {
      setnames(dividend, "div", "dividend")
    }
    if( all( names(dividend) %in% c("index", "V1") ) ){
      setnames(dividend, "V1", "dividend")
    }
  }
  normalize <- function(x) {
    if( is.data.table(x) ) {
      if( ! has_symbol(x) ) {
        if ( multi_symbol ) {
          stop(deparse(substitute(x)),
               " must have symbol for multisymbol price data")
        } else if ( has_symbol(price)) {
          x[, symbol := available_symbols]
        }
      } else {
        multi_symbol <<- multi_symbol || x[,length(unique(symbol)) > 1]
        if( multi_symbol && ! has_symbol(price) ) {
          stop("price must have symbol for multisymbol ",
               deparse(substitute(x)),
               " data")
        }
      }
    }
    x
  }
  dividend <- normalize(dividend) # I thought the assignment was unnecessary because
  # data.tables are passed by reference, but it wasn't always working??
  if( xts::is.xts(splits) ) {
    splits <- as.data.table(splits)
    if( needs_symbol(splits) ) {
      splits <- gather_symbol(splits)
    }
    if( "spl" %in% names(splits) ) {
      setnames(splits, "spl", "split")
    }
    if( all( names(splits) %in% c("index", "V1") ) ){
      setnames(splits, "V1", "split")
    }
  }
  splits <- normalize(splits)
  index_class <- class(price[,index])
  is_date <- all( class(price[,index]) == "Date" )
  if( ! is_date ) {
    price[, index := as.Date(index)]
  }
  setkeyv(price, merge_key)
  if( has_some(splits, "split") ) {
    splits[, index := as.Date(index)]
    setkeyv(splits, merge_key)
    price_cols <- names(price)
    extra_split_cols <- names(splits)[ !( names(splits) %in% price_cols)]
    price <- splits[price, on=merge_key]
    setcolorder(price, c(price_cols, extra_split_cols))
    # price <- merge( price, splits, by = merge_key, all.x = TRUE)
    price[is.na(split), split := 1]
    if ( has_symbol(price) ) {
      price[, rawshares := 1 / cumprod(split), by = symbol ]
    } else {
      price[, rawshares := 1 / cumprod(split)]
    }
    price[, rawvalue := rawshares * Close]
  } else {
    price[, split := 1]
    price[, rawshares := 1]
    price[, rawvalue := Close]
  }
  if( has_some(dividend, "dividend") ) {
    dividend[, index := as.Date(index)]
    if( has_some(splits, "split") ) {
      dividend <- merge(dividend, splits, by = merge_key, all = TRUE)
      dividend[is.na(dividend), dividend := 0]
      dividend[is.na(split), split := 1]
      if( has_symbol(price) ) {
        dividend[, rawshares := 1/cumprod(split), by = symbol]
        dividend[, retroactive_shares := last(rawshares) / rawshares, by = symbol]
        dividend[, rawdividend := retroactive_shares * dividend, by = symbol]
      } else {
        dividend[, rawshares := 1/cumprod(split)]
        dividend[, retroactive_shares := last(rawshares) / rawshares]
        dividend[, rawdividend := retroactive_shares * dividend]
      }
      dividend[, split := NULL]
      dividend[, rawshares := NULL] 
      dividend[, retroactive_shares := NULL] #maybe keep this
    } else {
      dividend[, rawdividend := dividend]
    }
    price_cols <- names(price)
    extra_div_cols <- names(dividend)[ ! ( names(dividend) %in% price_cols ) ]
    setkeyv(dividend, merge_key)
    price <- dividend[price, on=merge_key]
    setcolorder(price, c(price_cols, extra_div_cols))
    # price <- merge(price, dividend, by = merge_key, all.x = TRUE)
    price[is.na(dividend), dividend := 0]
    price[is.na(rawdividend), rawdividend := 0]
    if( has_symbol( price ) ) {
      price[, rawvalue := rawvalue + cumsum(rawshares * rawdividend), by = symbol]
    } else {
      price[, rawvalue := rawvalue + cumsum(rawshares * rawdividend)]
    }
  } else {
    price[, dividend := 0]
    price[, rawdividend := 0]
  }
  if( ! is_date ) {
    if ( all( index_class == c("POSIXct", "POSIXt") ) ) {
      sorted_attr <- attr(price,"sorted")
      price[, index := as.POSIXct( as.character(index) )]
      attr(price,"sorted") <- sorted_attr
    } else{
      stop("index must be POSIXct or Date currently.")
    }
  }
  if( is_xts ) {
    price <- xts::as.xts(spread_symbol(price))
    xts::xtsAttributes(price) <- xts_attr
  }
  price
}

#' Compute returns as value change plus dividends.
#' 
#' @param ... Additional arguments.
#' @return Return data assuming shares are split according to split data
#'     and dividends are paid, one for each share, but not reinvested.
#'     
#' @export
make_raw_return <- function(...) UseMethod("make_raw_return")

#' @describeIn make_raw_return
#' 
#' Compute raw return from split adjusted shares, dividends, and closing prices.
#' 
#' Computes the return of buying the given number of shares at the first closing price
#' and exiting at any intermediate closing price including splits and dividends. The
#' number of shares should only change based on splits; no accounting is done to rebase
#' the return for other purchases and sales of shares. The dividends must be as they would
#' historically have been reported, not split adjusted as Yahoo provides. A dividend on 
#' the first tick is not included in the return as Yahoo reports them on the ex-date. 
#' 
#' @param split_adjusted_shares Numeric vector of number of shares accounting
#'   for share splits. Typically, starts with 1 and adjusts for each split to reflect what
#'   that 1 share has become.
#' @param split_unadjusted_dividends Numeric vector of dividends as they would 
#'   have been announced historically. The values returned from Yahoo are split adjusted. 
#'   For example, a dividend of 0.2 that occured before a 2:1 split would be reported as
#'   0.1 by Yahoo (assuming no subsequent splits). This function expects the 0.2. See 
#'   \code{\link{unadjust}}.
#' @param price_data The historical prices, usually closes, with no adjustments.
#' @param initial_rawclose Optional starting value to measure returns against. Default is
#' NULL, i.e., just use the first ticks from the other data vectors. If used, the first 
#' dividend will be included.
#' 
#' @return A numeric vector the same length as the inputs with the arithmetic return from
#' the first close including dividends.
#' 
#' @export
make_raw_return.default <- function(price_data, split_adjusted_shares, split_unadjusted_dividends,
                                initial_rawclose = NULL, ...) {
  rawclose <- split_adjusted_shares * price_data
  if( is.null(initial_rawclose) ) {
    initial_rawclose <- rawclose[1]
    paid_dividends <- function(x) c(0, x[-1])
  } else {
    paid_dividends <- function(x) x
  }
  (rawclose + cumsum(split_adjusted_shares * paid_dividends(split_unadjusted_dividends) ) - 
     initial_rawclose)/ initial_rawclose
}

#' @describeIn make_raw_return
#' 
#' Compute period returns as value change plus dividends.
#' 
#' @param price_data_xts An \code{xts} object with columns \code{Close, rawshares, rawdividend}
#' or with a symbol \code{SYM}, columns \code{SYM.Close, SYM.Rawshares, SYM.Rawdividend}.
#' @param period A string indicating a granularity. See \code{\link[quantmod]{periodReturn}}.
#' 
#' @return An \code{xts} object ordered by the \code{index} column granulated to period. The
#' returns are calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo 
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#' 
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#' 
#' @export
make_raw_return.xts <- function(price_data_xts, period = 'monthly', ...) {
  index <- NULL
  period_opts <- list(daily = "days", weekly = "weeks", monthly = "months", 
                      quarterly = "quarters", yearly = "years", annually = "years")
  end_points <- xts::endpoints( index(price_data_xts), 
                                on = period_opts[[period]])
  end_points <- end_points[-1] #drop initial 0
  if( ! 1 %in% end_points ) end_points <- c(1, end_points)
  n <- length(end_points)
  price_data_dt <- as.data.table(price_data_xts)
  if( needs_symbol(price_data_dt) ) {
    price_data_dt <- gather_symbol(price_data_dt)
  }
  raw_ret_dt <- make_raw_return( price_data_xts, 
                       start = end_points[-n],
                       end = end_points[-1], ... )
  if ( period == 'daily' ) {
    initial_zero_to_match_quantmod <- data.table(index = index(price_data_xts[1,]),
                                                 rawreturn = 0)
    raw_ret <- xts::as.xts( rbind( initial_zero_to_match_quantmod,
                              raw_ret_dt ) )
  } else {
    # dt version gives intermediate returns between start and end
    # to match quantmod, only return the values at endpoints
    # end_points[-1]: remove initial starting point
    # -1: indexing is off by 1 because make_raw_return will not have a return on the
    # first tick
    raw_ret <- xts::as.xts( raw_ret_dt[end_points[-1]-1,] ) 
  }
  colnames(raw_ret) <- paste(period, "rawreturn", sep = "_")
  raw_ret
} 

#' @describeIn make_raw_return
#' 
#' Compute raw return between start and end.
#' 
#' @param price_data_dt A \code{data.table} of price data as produced by \code{make_raw_data}. 
#'   In particular, it must have columns \code{close, rawshares, rawdividend}.
#' @param start,end Index vectors (numeric or logical) of starting/ending points. 
#'     They should be the same length. If logical, they should have the same number
#'     of TRUE values.
#' @return A \code{data.table} of raw returns with columns index and rawreturn. 
#' If the (start,end] ranges do not overlap, the returns are a running calculation from
#' each start indexed from start+1 to end. If there the (start,end] ranges do not partition
#' the index of the input, the missing indexes are omitted (especially useful when looking
#' at trade returns in which you are not always in the market). If there is overlap in
#' the (start,end] ranges, there is just one return calculated for each end indexed by the 
#' index at end.
#' 
#' @export
make_raw_return.data.table <- function(price_data_dt, start = 1, end = nrow(price_data_dt), ...) {
  n <- nrow(price_data_dt)
  period_index <- index <- on_period <- Close <- rawshares <-
    rawdividend <- rawreturn <- NULL
  if( xts::timeBased(end) ) {
    end_index <- price_data_dt[, which(as.IDate(index) %in% as.IDate(end)) ]
  } else {
    end_index <- end
  }
  if( xts::timeBased(start) ) {
    start_index <- price_data_dt[, which(as.IDate(index) %in% as.IDate(start)) ]
  } else {
    start_index <- start
  }
  n_intervals <- length(start_index)

  price_data_dt[, period_index := 0]
  price_data_dt[end_index, period_index := 1]
  price_data_dt[start_index, period_index := period_index + 1]
  price_data_dt[, period_index := c(0, cumsum( period_index)[-n]) ]
  price_data_dt[, on_period := period_index %in% period_index[end_index]]

  initial_rawclose <- price_data_dt[start_index, Close * rawshares]
  overlap <- any( start_index[-1] < end_index[-n_intervals])
  if( overlap ) {
    overlapped_dividends <- numeric(length(initial_rawclose))  
    for( idx in seq_along(start_index)) {
      overlapped_dividends[idx] <- price_data_dt[(start_index[idx]+1):end_index[idx],
                                                   sum(rawdividend * rawshares / last(rawshares) )]
    }
    raw_ret <- price_data_dt[(on_period),  
                          list(rawreturn = make_raw_return(last(Close),
                                                           last(rawshares), 
                                                           overlapped_dividends[.GRP], 
                                                           initial_rawclose[.GRP],...) ), 
                          by = period_index]
  } else {
    raw_ret <- price_data_dt[(on_period), 
                               list(rawreturn = make_raw_return(Close,
                                                                rawshares, 
                                                                rawdividend, 
                                                                initial_rawclose[.GRP],...)), 
                             by = period_index]
  }
  ret <- data.table( index = price_data_dt[(on_period), index],
              rawreturn = raw_ret[,rawreturn])
  price_data_dt[, period_index := NULL]
  price_data_dt[, on_period := NULL]
  ret
  # Dividends are paid to the holder on the previous day in Yahoo data.
  # Therefore, they need to be offset by 1 as to which period they are
  # paid in. A dividend on the initial day should not be included because
  # it would be paid in the previous period. If the initial period is partial,
  # I suppose it should be included, but currently it's not. The first period
  # isn't perfect anyway because we don't have the true starting price.


}

#' Compute shares held based on splits and reinvested dividends.
#' 
#' For a sequence of closing prices, split adjusted shares, and split unadjusted dividends,
#' (i.e., close, raw_shares, and raw_dividends), compute the number or shares held if the
#' dividends are reinvested.
#' 
#' This function assumes that a dividend at tick t is paid to the stock owners at tick t-1 (i.e.,
#' t is the ex-date in accordance with how Yahoo reports dividends). Therefore, a dividend
#' on the initial tick is assumed not to be received.
#' 
#' Per the ordinary dividends received on mutual funds in my TD Ameritrade account, the
#' reinvestment of a dividend on date t is done at a trade price of the close on date t-1.
#' This may be a bad assumption in other cases. And in yet other cases, it is not 
#' practical to reinvest your dividends at all (stocks in my TD Ameritrade account). If 
#' your account is big enough to reinvest dividends, this function may need to be
#' modified to match your reinvestment procedure. Also for large accounts, you may
#' need to find share buyback data to accurately reflect other ways cash will be returned
#' to you (not handled in this function).
#' 
#' @param close A numeric vector of closing prices, not adjusted for splits or dividends.
#' @param split_adjusted_shares A numeric vector of shares held. The shares should already
#' account for splits (e.g., if you start with 1 share and there is a 2:1 split, you should
#' have 2 shares after the split).
#' @param unadjusted_dividends A numeric vector of dividends paid. These should not be adjusted
#' for splits. Yahoo adjusts their dividends for splits. The make_raw_value function can create
#' raw dividends (i.e., undo the split adjustment).
#' 
#' @return A numeric vector of shares after splits and reinvestment of dividends.
#' 
#' @export
make_reinvested_shares <- function(close, split_adjusted_shares, unadjusted_dividends) {
  split_adjusted_shares*cumprod(1+unadjusted_dividends/c(Inf,close[-length(close)]))
}

#' Compute period returns assuming reinvestment of dividends.
#' 
#' @param price_data Price data on which to compute returns
#' @param ... Additional arguments to pass to specializations.
#' 
#' @return Return data calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo 
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#' 
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#' 
#' @export
make_reinvested_return <- function(price_data, ...) UseMethod("make_reinvested_return")

#' Compute reinvested return from split adjusted shares, dividends, and closing prices.
#' 
#' Computes the return of buying the given number of shares at the first closing price
#' and exiting at any intermediate closing price including splits and dividends. The
#' number of shares will change based on splits and reinvesting dividends; 
#' no accounting is done to rebase
#' the return for other purchases and sales of shares. The dividends must be as they would
#' historically have been reported, not split adjusted as Yahoo provides. A dividend on 
#' the first tick is not included in the return as Yahoo reports them on the ex-date. 
#' 
#' @param price_data The historical price data with no adjustments.
#' @param split_adjusted_shares Numeric vector of number of shares accounting
#'   for share splits. Typically, starts with 1 and adjusts for each split to reflect what
#'   that 1 share has become.
#' @param unadjusted_dividends Numeric vector of dividends as they would 
#'   have been announced historically. The values returned from Yahoo are split adjusted. 
#'   For example, a dividend of 0.2 that occured before a 2:1 split would be reported as
#'   0.1 by Yahoo (assuming no subsequent splits). This function expects the 0.2. Available
#'   as \code{rawdividend} as returned by \code{make_raw_value}.
#' @param initial_price Optional starting price to measure returns against. Default is
#' NULL, i.e., just use the first tick from the \code{price_data} vector. If used, the first 
#' dividend will be included and initial_shares must be provided. 
#' @param initial_shares Optional starting shares to measure returns against. Default is NULL,
#' i.e., just use the first tick in the split_adjusted_shares vector. If used, the 
#' initial_close must be provided.
#' @param ... Additional arguments currently unused.
#' 
#' @return A numeric vector the same length as the inputs with the arithmetic return from
#' the first close assuming dividends are reinvested in shares per make_reinvested_shares.
#' 
#' @export
make_reinvested_return.default <- function(price_data, split_adjusted_shares, unadjusted_dividends,
                                    initial_price = NULL, initial_shares = NULL, ...) {
  if( is.null(initial_price) && is.null(initial_shares) ) {
    reinvested_shares <- make_reinvested_shares( price_data, split_adjusted_shares, unadjusted_dividends)
    initial_value <- reinvested_shares[1] * price_data[1]
  } else {
    reinvested_shares <- make_reinvested_shares( c(initial_price, price_data), 
                                                 c(initial_shares, split_adjusted_shares), 
                                                 c(0, unadjusted_dividends) )[-1]
    initial_value <- initial_shares * initial_price
  }
  (price_data*reinvested_shares - initial_value ) / initial_value
}

#' Compute reinvested return between start and end.
#' 
#' @param price_data A data.frame of prices as produced by make_raw_data. In particular,
#' requires columns index, close, rawshares, and rawdividend.
#' @param start An index vector (numeric or logical) of starting points.
#' @param end An index vector (numeric or logical) of ending poitns.
#' @param ... Additional arguments.
#' @return A data.frame of reinvested returns with columns index and reinvested_return. 
#' If the (start,end] ranges do not overlap, the returns are a running calculation from
#' each start indexed from start+1 to end. If there the (start,end] ranges do not partition
#' the index of the input, the missing indexes are omitted (especially useful when looking
#' at trade returns in which you are not always in the market). If there is overlap in
#' the (start,end] ranges, there is just one return calculated for each end indexed by the 
#' index at end.
#' 
#' @export
make_reinvested_return.data.table <- function(price_data, start = 1, end = nrow(price_data), ...) {
  n <- nrow(price_data)
  index <- period_index <- on_period <- Close <- rawshares <-
    rawdividend <- reinvested_return <- NULL
  if( xts::timeBased(end) ) {
    end_index <- price_data[, which(as.IDate(index) %in% as.IDate(end)) ]
  } else {
    end_index <- end
  }
  if( xts::timeBased(start) ) {
    start_index <- price_data[, which(as.IDate(index) %in% as.IDate(start)) ]
  } else {
    start_index <- start
  }
  n_intervals <- length(start_index)  
  
  price_data[, period_index := 0]
  price_data[end_index, period_index := 1]
  price_data[start_index, period_index := period_index + 1]
  price_data[, period_index := c(0, cumsum( period_index)[-n]) ]
  price_data[, on_period := period_index %in% period_index[end_index]]
  
  initial_close <- price_data[start_index, Close]
  initial_shares <- price_data[start_index, rawshares]
  overlap <- any( start_index[-1] < end_index[-n_intervals])
  if( overlap ) {
#     overlapped_shares <- numeric(length(initial_rawclose))  
    overlapped_dividend <- numeric(length(initial_close))  
    for( idx in seq_along(start_index)) {
#       overlapped_shares[idx] <- price_data[start_index[idx]:end_index[idx],
#                                            make_reinvested_shares( close, rawshares, rawdividend ) ]
      overlapped_dividend[idx] <- price_data[start_index[idx]:end_index[idx],
               ( last( make_reinvested_shares( Close, rawshares, rawdividend) ) /
                  last(rawshares) - 1 ) *
                 initial_close[idx] ]
    }
#     overlapped_dividend <- overlapped_shares
    ret_data <- price_data[(on_period),  list(reinvested_return = 
                                                make_reinvested_return(last(Close),
                                                                       last(rawshares), 
                                                                       overlapped_dividend[.GRP],
                                                                       initial_close[.GRP],
                                                                       initial_shares[.GRP],...) ), 
                              by = period_index]
  } else {
    ret_data <- price_data[(on_period), 
                              list(reinvested_return = make_reinvested_return(Close, 
                                                                              rawshares, 
                                                                              rawdividend, 
                                                                              initial_close[.GRP],
                                                                              initial_shares[.GRP],...)), 
                              by = period_index]
  }
  ret <- data.table( index = price_data[(on_period), index],
                     reinvested_return = ret_data[,reinvested_return])
  price_data[, period_index := NULL]
  price_data[, on_period := NULL]
  ret
  # Dividends are paid to the holder on the previous day in Yahoo data.
  # Therefore, they need to be offset by 1 as to which period they are
  # paid in. A dividend on the initial day should not be included because
  # it would be paid in the previous period. If the initial period is partial,
  # I suppose it should be included, but currently it's not. The first period
  # isn't perfect anyway because we don't have the true starting price.
  
  
}

#' Compute period returns by reinvesting within period dividends.
#' 
#' @param price_data An \code{xts} object with columns \code{Close, rawshares, rawdividend}
#' or with a symbol \code{SYM}, columns \code{SYM.Close, SYM.Rawshares, SYM.Rawdividend}.
#' @param period A string indicating a granularity. See quantmod::periodReturn.
#' @param ... Additional arguments.
#' 
#' @return An \code{xts} object ordered by the \code{index} column granulated to period. The
#' returns are calculated as close to close values multiplied by rawshares to account for
#' splits and adding in dividends over the period. Note that a dividend on date t in Yahoo 
#' data is only paid to those holding at the close on date t-1. Therefore, there a dividend
#' on date t for which t is also the period start will be recorded in the period ending on date
#' t, not the next period starting on t. In particular, daily returns include a dividend on
#' date t in the return from t-1 to t, recorded as the return on date t.
#' 
#' Note that daily raw returns for one week do not simply convert to weekly returns as prod(1+r).
#' Each day the basis for return is the close, but the return comes from the following close
#' plus dividends. Those dividends are not included in the basis for the following day.
#' 
#' @export
make_reinvested_return.xts <- function(price_data, period = 'monthly', ...) {
  index <- NULL
  period_opts <- list(daily = "days", weekly = "weeks", monthly = "months", 
                      quarterly = "quarters", yearly = "years", annually = "years")
  end_points <- xts::endpoints( index(price_data), 
                                on = period_opts[[period]])
  end_points <- end_points[-1] #drop initial 0
  if( ! 1 %in% end_points ) end_points <- c(1, end_points)
  n <- length(end_points)
  price_dt <- as.data.table(price_data)
  if( needs_symbol(price_dt) ) {
    price_dt <- gather_symbol(price_dt)
  }
  reinvested_ret_dt <- make_reinvested_return( price_dt, 
                                 start = end_points[-n],
                                 end = end_points[-1], ... )
  if ( period == 'daily' ) {
    initial_zero_to_match_quantmod <- data.table(index = index(price_data[1,]),
                                                 reinvested_return = 0)
    reinvested_ret <- xts::as.xts( rbind( initial_zero_to_match_quantmod,
                              reinvested_ret_dt ) )
  } else {
    # dt version gives intermediate returns between start and end
    # to match quantmod, only return the values at endpoints
    # end_points[-1]: remove initial starting point
    # -1: indexing is off by 1 because make_reinvested_return will not have a return on the
    # first tick
    reinvested_ret <- xts::as.xts( reinvested_ret_dt[end_points[-1]-1,] ) 
  }
  colnames(reinvested_ret) <- paste(period, "reinvested_return", sep = "_")
  reinvested_ret
}
e-mu-pi/adjustr documentation built on Sept. 26, 2024, 9:34 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
e-mu-pi/adjustr
Adjust Stock Price Data for Splits and Dividends

R/adjustment.R
In e-mu-pi/adjustr: Adjust Stock Price Data for Splits and Dividends

R Package Documentation

Browse R Packages

We want your feedback!

e-mu-pi/adjustr Adjust Stock Price Data for Splits and Dividends

R/adjustment.R In e-mu-pi/adjustr: Adjust Stock Price Data for Splits and Dividends

R Package Documentation

Browse R Packages

We want your feedback!

e-mu-pi/adjustr
Adjust Stock Price Data for Splits and Dividends

R/adjustment.R
In e-mu-pi/adjustr: Adjust Stock Price Data for Splits and Dividends