#' Preprocess LM jump test result
#'
#' Convenience functionalities for subsetting the jump results, changing the significance level, splitting the jumps into different waves, adding a Bonferroni correction, splitting the data by exchange / symbol, and modifying the variation measure for the test statistic
#'
#' @import data.table
#'
#' @param DATA The output from `LM_JumpTest_2012`.
#' @param subs_date A vector with dates can be provided to subset from. Defaults to NA.
#' @param sign_level The significance level for detecting jumps. Defaults to 0.01.
#' @param split_waves Should the data be split into jump waves? Defaults to FALSE.
#' @param wave_dist If `split_waves = TRUE`, where to set the splits? E.g. if no jump for longer than `wave_dist = 5`, the wave is over. This is adaptive w.r.t. to minutely / secondly frequency. Defaults to NA.
#' @param wave_size Lower threshold of jumps needed to be in a wave for it to be considered? Defaults to 5, i.e. only waves with at least 5 jumps will be kept in the end. Set to 1 for all jumps to be included.
#' @param bonferroni Should a Bonferroni correction be performed? Defaults to TRUE.
#' @param DIFF_ID Should the data be evaluated by exchange and symbol or just by exchange? Defaults to FALSE, i.e. jumps are calculated per symbol.
#' @param asymptotic_variation Should the asymptotic variation be used for calculating the test statistic? Defaults to FALSE, s.t. the empirical variation is used.
#' @param hourly Is the test statistic calculated hourly? This can be useful for very high frequency data. Defaults to FALSE.
#' @export
preprocess_jump_data <- function(DATA,
subs_date = NA,
sign_level = 0.01,
split_waves = FALSE,
wave_dist = NA,
wave_size = 5,
bonferroni = TRUE,
DIFF_ID = FALSE,
asymptotic_variation = FALSE,
hourly = FALSE){
## choose DT_LM_jump_only ##
DT_LM_jump_only <- copy(DATA)
if (!typeof(DT_LM_jump_only[,date]) == "character") DT_LM_jump_only[, date := as.character(date)]
if (!is.na(subs_date)) {DT_LM_jump_only <- DATA[date %in% subs_date]}
if(! "hour" %in% names(DT_LM_jump_only)) DT_LM_jump_only[, hour := hour(t)] # if no h available
setkey(DT_LM_jump_only, t)
## ##
## evaluate per symbol + exchange or only per symbol? ##
if(DIFF_ID == FALSE){ # if no exchange ID available
which_id <- quote(s)
} else {
which_id <- quote(id_s)
}
##
## Adjust sign. level + Bonferroni correction ##
DT_LM_jump_only[, 'sign_level' := sign_level]
if (bonferroni == TRUE) {
if (hourly == TRUE){
DT_LM_jump_only[DT_LM_jump_only[, (.N), , by=c("date", "hour", "id", "s")], n_obs_sample := i.V1, on=c("date", "hour", "id", "s")]
}
if (hourly == FALSE){
DT_LM_jump_only[DT_LM_jump_only[, (.N), , by=c("date", "id", "s")], n_obs_sample := i.V1, on=c("date", "id", "s")]
}
DT_LM_jump_only[, 'sign_level' := sign_level/n_obs_sample] # bonferroni correction
}
DT_LM_jump_only[,'betastar' := -log(-log(1-sign_level))]
DT_LM_jump_only[,'criticalvalue' := betastar*B_n + A_n]
if (asymptotic_variation == FALSE) {
DT_LM_jump_only[, 'Chi_t_j' := sqrt(M) / sqrt(Vn) * L_t_j]
} else if (asymptotic_variation == TRUE) {
DT_LM_jump_only[, 'Chi_t_j' := sqrt(M) / sqrt(plim_Vn) * L_t_j]
}
DT_LM_jump_only[,'Jump_indicator' := as.numeric(abs(Chi_t_j) > criticalvalue)]
DT_LM_jump_only[,'Jump_indicator' := Jump_indicator*sign(Chi_t_j)]
DT_LM_jump_only <- DT_LM_jump_only[Jump_indicator != 0]
if(nrow(DT_LM_jump_only) == 0) {print("No jumps detected"); return(data.table())}
if (split_waves == TRUE){
## make a separation between jump waves ##
# add observation counter #
DT_LM_jump_only[, obs_N := 1:nrow(DT_LM_jump_only)]
# count time between observed jumps #
DT_LM_jump_only[, t_diff := as.difftime(t-shift(t, type = "lag"))][, id_s := paste(id,s,sep="_")]
# prepare variables for loop #
if (units(DT_LM_jump_only$t_diff) == "secs" ){
splits <- DT_LM_jump_only[t_diff > wave_dist]$obs_N # where to set splits? t_diff/60 > x -> if no jump for longer than x min, wave over
} else if (units(DT_LM_jump_only$t_diff) == "mins" ) {
splits <- DT_LM_jump_only[t_diff*60 > wave_dist]$obs_N
}
start <- 1
end <- nrow(DT_LM_jump_only)
split_list <- vector(mode = "list", length = length(splits)+1) # preassign list
# fill first list entry manually #
split_list[[1]] <- start:(splits[1]-1)
# add all other values manually (this will fail for the last entry) #
for (i in 1:(length(splits))){
if (!is.na(splits[i+1]))
split_list[[i+1]] <- (splits[i]):(splits[i+1]-1)
}
# because of the error in the last value, add last entry manually again #
split_list[[length(split_list)]] <- last(splits):end
# split the data table at the previously determined breakpoints and add a rank variable #
list_LM_jump_only <- lapply(split_list, function(x) DT_LM_jump_only[x][, rank := .GRP, by = t][, index := 1:nrow(DT_LM_jump_only[x])])
# keep only those with more than N values (arbitrary threshold) #
list_LM_jump_only <- list_LM_jump_only[which(sapply(list_LM_jump_only, function(x) nrow(x)) >= wave_size)]
# rebind the data with the rankings per wave#
DT_LM_jump_only <- rbindlist(list_LM_jump_only, idcol = "wave_ID")
}
setkey(DT_LM_jump_only, t)
return(DT_LM_jump_only)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.