Nothing
## Copyright (C) 2015,2016 Philip Stubbings <phil@parasec.net>
## Licensed under the GPL v2 license. See LICENSE.md for full terms.
##' Calculate priceLevelVolume (depth).
##'
##' Given a data.frame of limit order events, this function will calculate
##' the cumulative volume for each price level over time. Changes in volume
##' at a price level can occur when an a new order is added to the queue,
##' updated (partial fill) or deleted (execution or removal). The resulting
##' time series is of the form:
##'
##' \describe{
##' \item{timestamp}{local time at which volume changed}
##' \item{price}{price level at which volume changed}
##' \item{volume}{amount of volume now remaining at this price level}
##' \item{side}{the current side of the price level in the order book}
##' }
##'
##' @param events Limit order events.
##' @return Time series of liquidity for each price level in the order book.
##' @author phil
##' @keywords internal
priceLevelVolume <- function(events) {
directionalPriceLevelVolume <- function(dir.events) {
cols <- c("event.id", "id", "timestamp", "exchange.timestamp", "price",
"volume", "direction", "action")
added.volume <- dir.events[(dir.events$action == "created"
| (dir.events$action == "changed" & dir.events$fill == 0))
& dir.events$type != "pacman" & dir.events$type != "market", cols]
cancelled.volume <- dir.events[(dir.events$action == "deleted"
& dir.events$volume > 0) & dir.events$type != "pacman"
& dir.events$type != "market", cols]
cancelled.volume$volume <- -cancelled.volume$volume
# remove deletes with no previous add.
cancelled.volume <- cancelled.volume[cancelled.volume$id %in%
added.volume$id, ]
filled.volume <- dir.events[dir.events$fill > 0
& dir.events$type != "pacman"
& dir.events$type != "market",
c("event.id", "id", "timestamp", "exchange.timestamp", "price", "fill",
"direction", "action")]
filled.volume$fill <- -filled.volume$fill
# remove fills with no previous add.
filled.volume <- filled.volume[filled.volume$id %in% added.volume$id, ]
colnames(filled.volume) <- cols
volume.deltas <- rbind(added.volume, cancelled.volume, filled.volume)
volume.deltas <- volume.deltas[order(volume.deltas$price,
volume.deltas$timestamp), ]
# ^^-- so price level deltas are now in order and order life-cycles can
# overlap..
cum.volume <- unlist(tapply(volume.deltas$volume, volume.deltas$price,
function(volume) cumsum(volume)), use.names=F)
# this can happen with missing data...
cum.volume <- ifelse(cum.volume < 0, 0, cum.volume)
cbind(volume.deltas[, c("timestamp", "price")], volume=cum.volume,
side=volume.deltas$direction)
}
bids <- events[events$direction == "bid", ]
depth.bid <- directionalPriceLevelVolume(bids)
asks <- events[events$direction == "ask", ]
depth.ask <- directionalPriceLevelVolume(asks)
depth.data <- rbind(depth.bid, depth.ask)
depth.data[order(depth.data$timestamp), ]
}
##' Filter price level volume.
##'
##' Given depth data calculated by \code{\link{priceLevelVolume}}, filter
##' between a specified time range. The resulting data will contain price level
##' volume which is active only within the specified time range.
##'
##' For price levels with volume > 0 before the time range starts, timestamps
##' will be set to the supplied \code{from} parameter.
##'
##' For volume > 0 after the time range ends, timestamps will be set to the
##' supplied \code{to} parameter and volume set to 0.
##'
##' For example, the following data taken from \code{\link{priceLevelVolume}}
##' for price level 243.29 shows the available volume through time at that
##' price level between \code{00:52:37.686} and \code{03:28:49.621}.
##'
##' \tabular{rrrr}{
##' timestamp \tab price \tab volume \tab side \cr
##' 2015-05-01 00:52:37.686 \tab 243.29 \tab 911500000 \tab ask \cr
##; 2015-05-01 01:00:33.111 \tab 243.29 \tab 0 \tab ask \cr
##' 2015-05-01 01:00:36.243 \tab 243.29 \tab 862200000 \tab ask \cr
##' 2015-05-01 02:45:43.052 \tab 243.29 \tab 0 \tab ask \cr
##' 2015-05-01 02:52:24.063 \tab 243.29 \tab 614700000 \tab ask \cr
##' 2015-05-01 02:52:51.413 \tab 243.29 \tab 0 \tab ask \cr
##' 2015-05-01 02:53:13.904 \tab 243.29 \tab 952300000 \tab ask \cr
##' 2015-05-01 03:28:49.621 \tab 243.29 \tab 0 \tab ask}
##'
##' applying \code{filterDepth} to this data for a time range beteen
##' \code{02:45} and \code{03:00} will result in the following:
##'
##' \tabular{rrrr}{
##' timestamp \tab price \tab volume \tab side \cr
##' 2015-05-01 02:45:00.000 \tab 243.29 \tab 862200000 \tab ask \cr
##' 2015-05-01 02:45:43.052 \tab 243.29 \tab 0 \tab ask \cr
##' 2015-05-01 02:52:24.063 \tab 243.29 \tab 614700000 \tab ask \cr
##' 2015-05-01 02:52:51.413 \tab 243.29 \tab 0 \tab ask \cr
##' 2015-05-01 02:53:13.904 \tab 243.29 \tab 952300000 \tab ask \cr
##' 2015-05-01 03:00:00.000 \tab 243.29 \tab 0 \tab ask}
##'
##' Note that the timestamps at the begining and end of the table have been
##' \emph{clamped} to the specified range and the volume set to 0 at the end.
##'
##' @param d \code{\link{depth}} data.
##' @param from Beginning of range.
##' @param to End of range.
##' @return Filtered depth data.
##' @author phil
##' @examples
##'
##' # obtain price level volume for a 15 minute window.
##' filtered <- with(lob.data, filterDepth(depth,
##' from=as.POSIXct("2015-05-01 02:45:00.000", tz="UTC"),
##' to=as.POSIXct("2015-05-01 03:00:00.000", tz="UTC")))
##'
##' # top 5 most active price levels during this 15 minute window.
##' head(sort(tapply(filtered$volume, filtered$price, length),
##' decreasing=TRUE), 5)
##'
##' # extract available volume for price level 233.78, then plot it.
##' level.233.78 <- filtered[filtered$price == 233.78, c("timestamp", "volume")]
##' plotTimeSeries(level.233.78$timestamp, level.233.78$volume*10^-8)
##'
##' @export filterDepth
filterDepth <- function(d, from, to) {
# 1. get all active price levels before start of range.
pre <- d[d$timestamp <= from, ]
pre <- pre[order(pre$price, pre$timestamp), ]
# last update for each price level <= from. this becomes the starting point
# for all updates within the range.
pre <- pre[!duplicated(pre$price, fromLast=T) & pre$volume > 0, ]
# clamp range (reset timestamp to from if price level active before start of
# range.
if(nrow(pre) > 0) {
pre$timestamp <- as.POSIXct(sapply(pre$timestamp, function(r) {
max(from, r)
}), origin="1970-01-01", tz="UTC")
}
# 2. add all volume change within the range.
mid <- d[d$timestamp > from & d$timestamp < to, ]
range <- rbind(pre, mid)
# 3. at the end of the range, set all price level volume to 0.
open.ends <- data.frame(timestamp=to,
range[(!duplicated(range$price, fromLast=T)) & range$volume > 0, -1])
open.ends$volume <- 0
# combine pre, mid and open.ends. ensure it is in order.
range <- rbind(range, open.ends)
range <- range[order(range$price, range$timestamp), ]
range
}
##' Calculate order book summary statistics/metrics.
##'
##' This function calculates various summary statistics describing the state of
##' the limit order book after every event. The metrics are intended to quantify
##' the "shape" of the order book through time. Currenly the following metrics
##' are calculated:
##'
##' \preformatted{
##' [timestamp,
##' best.bid.price, best.bid.vol, bid.vol25:500bps,
##' best.ask.price, best.ask.vol, ask.vol25:500bps,]
##'
##' where timestamp = time of order book state change
##' best.bid.price = current best bid price
##' best.bid.vol = current amount of volume at the best bid
##' bid.vol25:500bps = amount of volume available > -25bps and <= best bid
##' until > 500bps <= 475bps.
##' ... the same pattern is then repeated for the ask side.
##' }
##'
##' @param depth Price level cumulative depth calculated by priceLevelVolume()
##' @param bps Width (in BPS) for each interval/bin
##' @param bins Number of intervals +- the best bid/ask to aggregate.
##' @return data.frame containing order book summary statistics.
##' @author phil
##' @keywords internal
depthMetrics <- function(depth, bps=25, bins=20) {
pctNames <- function(name) paste0(name, seq(bps, bps*bins, bps), "bps")
ordered.depth <- depth[order(depth$timestamp), ]
ordered.depth$price <- as.integer(round(100*ordered.depth$price))
depth.matrix <- cbind(ordered.depth$price, ordered.depth$volume,
ifelse(ordered.depth$side == "bid", 0, 1))
metrics <- matrix(0, ncol=2*(2+bins), nrow=nrow(ordered.depth),
dimnames=list(1:nrow(ordered.depth),
c("best.bid.price", "best.bid.vol", pctNames("bid.vol"),
"best.ask.price", "best.ask.vol", pctNames("ask.vol"))))
# the volume state for all price level depths. (updated in loop)
asks.state <- integer(length=1000000)
asks.state[1000000] <- 1 # trick (so there is an initial best ask)
bids.state <- integer(length=1000000)
bids.state[1] <- 1 # trick
# initial best bid/ask
best.ask <- max(ordered.depth[ordered.depth$side == "ask", ]$price)
best.bid <- min(ordered.depth[ordered.depth$side == "bid", ]$price)
best.ask.vol <- 0
best.bid.vol <- 0
for(i in 1:(nrow(ordered.depth))) {
depth.row <- depth.matrix[i, ]
price <- depth.row[1]
volume <- depth.row[2]
side <- depth.row[3]
# ask
if(side > 0) {
if(price > best.bid) {
asks.state[price] <- volume
if(volume > 0) {
if(price < best.ask) {
best.ask <- price
best.ask.vol <- volume
} else if(price == best.ask) {
best.ask.vol <- volume
}
} else {
if(price == best.ask) {
best.ask <- head(which(asks.state > 0), 1)
best.ask.vol <- asks.state[best.ask]
}
}
# + bps*bins range
price.range <- best.ask:round((1+bps*bins*0.0001)*best.ask)
volume.range <- asks.state[price.range]
#levels = length(price.range)
#width = levels/bins
#breaks <- ceiling(seq(width, levels, by=width))
breaks <- ceiling(cumsum(rep(length(price.range)/bins, bins)))
metrics[i, bins+3] <- best.ask
metrics[i, bins+4] <- best.ask.vol
metrics[i, (bins+5):(2*(2+bins))] <- intervalSumBreaks(volume.range, breaks)
# copy last bid data (no need to re-calculate it)
if(i > 1) metrics[i, 1:(2+bins)] <- metrics[i - 1, 1:(2+bins)]
} else {
# copy last data (no change)
if(i > 1) metrics[i, ] <- metrics[i - 1, ]
}
} else {
if(price < best.ask) {
bids.state[price] <- volume
if(volume > 0) {
if(price > best.bid) {
best.bid <- price
} else if(price == best.bid) {
best.bid.vol <- volume
}
} else {
if(price == best.bid) {
best.bid <- tail(which(bids.state > 0), 1)
best.bid.vol <- bids.state[best.bid]
}
}
price.range <- best.bid:round((1-bps*bins*0.0001)*best.bid)
volume.range <- bids.state[price.range]
#levels = length(price.range)
#width = levels/bins
#breaks <- ceiling(seq(width, levels, by=width))
breaks <- ceiling(cumsum(rep(length(price.range)/bins, bins)))
metrics[i, 1] <- best.bid
metrics[i, 2] <- best.bid.vol
metrics[i, 3:(2+bins)] <- intervalSumBreaks(volume.range, breaks)
# copy last ask data (no need to re-calculate it)
if(i > 1) metrics[i, (bins+3):(2*(2+bins))] <- metrics[i - 1, (bins+3):(2*(2+bins))]
} else {
# copy last data (no change)
if(i > 1) metrics[i, ] <- metrics[i - 1, ]
}
}
}
# back into $
res <- cbind(timestamp=ordered.depth$timestamp, data.frame(metrics))
keys <- c("best.bid.price", "best.ask.price")
res[, keys] <- round(0.01*res[, keys], 2)
res
}
##' Get the spread.
##'
##' Extracts the spread from the depth summary, removing any points in which a
##' change to bid/ask price/volume did not occur.
##'
##' The spread (best bid and ask price) will change following a market order or
##' upon the addition/cancellation of a limit order at, or within, the range of
##' the current best bid/ask. A change to the spread that is \emph{not} the
##' result of a market order (an impact/market shock) is known as a
##' \emph{quote}.
##'
##' The following table shows a market spread betwen \code{05:03:22.546} and
##' \code{05:04:42.957}. During this time, the best ask price and volume changes
##' whilst the best bid price and volume remains static.
##'
##' \tabular{rrrrr}{
##' timestamp \tab bid.price \tab bid.vol \tab ask.price \tab ask.vol \cr
##' 05:03:22.546 \tab 235.45 \tab 16235931 \tab 235.72 \tab 39375160 \cr
##' 05:03:24.990 \tab 235.45 \tab 16235931 \tab 235.72 \tab 21211607 \cr
##' 05:03:25.450 \tab 235.45 \tab 16235931 \tab 235.71 \tab 39375160 \cr
##' 05:04:15.477 \tab 235.45 \tab 16235931 \tab 235.72 \tab 39058160 \cr
##' 05:04:16.670 \tab 235.45 \tab 16235931 \tab 235.71 \tab 39058160 \cr
##' 05:04:42.957 \tab 235.45 \tab 16235931 \tab 235.71 \tab 77019160}
##'
##' @param depth.summary \code{\link{depth.summary}} data.
##' @return Bid/Ask spread quote data.
##' @author phil
##' @examples
##'
##' # get the last 25 quotes (changes to the spread).
##' with(lob.data, tail(getSpread(depth.summary), 25))
##'
##' @export getSpread
getSpread <- function(depth.summary) {
spread <- depth.summary[, c("timestamp",
"best.bid.price", "best.bid.vol",
"best.ask.price", "best.ask.vol")]
changes <- (diff(spread$best.bid.price) != 0
| diff(spread$best.bid.vol) != 0
| diff(spread$best.ask.price) != 0
| diff(spread$best.ask.vol) != 0)
spread[c(T, changes), ]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.