scripts/Q3_Empirical_Properties_of_Financial_Data.R

library(xts)
library(qrmdata)
library(qrmtools)
library(nortest) # for cvm.test()
library(ADGofTest) # for ad.test()
library(moments) # for agostino.test(), jarque.test()
library(mvtnorm)

#### Exploring_financial_time_series.R ####
### Stock prices ##

## Dow Jones stock price data
data("DJ_const")
str(DJ_const)

## We extract a time period and take the first 10 stocks
DJdata <- DJ_const['2006-12-29/2015-12-31', 1:10]

## Use plot for zoo objects to get multiple plots
plot.zoo(DJdata, xlab = "Time", main = "DJ (10 component series)")
X <- returns(DJdata) # or diff(log(DJdata))[-1,]
head(X)
plot.zoo(X, xlab = "Time", main = "Log-returns of 10 DJ component series")

## Aggregating log returns by summation for each column
## Weekly
X.w <- apply.weekly(X, FUN = colSums)
dim(X.w)
plot.zoo(X.w, type = "h", xlab = "Time", main = "Weekly log-returns") # 'h' = histogram = vertical lines only
## Monthly
X.m <- apply.monthly(X, FUN = colSums)
dim(X.m)
plot.zoo(X.m, type = "h", xlab = "Time", main = "Monthly log-returns")
## Quarterly
X.q <- apply.quarterly(X, FUN = colSums)
dim(X.q)
plot.zoo(X.q, type = "h", xlab = "Time", main = "Quarterly log-returns")

### Stock indexes ##

## Load stock index data
data("SP500") # S&P 500
data("FTSE") # FTSE
data("SMI") # SMI
plot.zoo(SP500, xlab = "Time", ylab = "S&P 500")

## Merge the three time series
all <- merge(SP500, FTSE, SMI)
nms <- c("S&P 500", "FTSE", "SMI")
colnames(all) <- nms
plot.zoo(all, xlab = "Time", main = "All")

## Merge and retain only days where all three time series are available
all.avail <- merge(SP500, FTSE, SMI, all = FALSE)
colnames(all.avail) <- nms
plot.zoo(all.avail, xlab = "Time", main = "All available")

## Compute returns
SP500.X <- returns(SP500)
FTSE.X  <- returns(FTSE)
SMI.X   <- returns(SMI)
X <- merge(SP500.X, FTSE.X, SMI.X, all = FALSE)
colnames(X) <- nms
plot.zoo(X, xlab = "Time", main = "Log-returns")
pairs(as.zoo(X), gap = 0, cex = 0.4)

## Aggregating
## By week
X.w <- apply.weekly(X, FUN = colSums)
plot.zoo(X.w, xlab = "Time", main = "Weekly log-returns", type = "h")
## By month
X.m <- apply.monthly(X, FUN = colSums)
plot.zoo(X.m, xlab = "Time", main = "Monthly log-returns", type = "h")


### Exchange rates ##

## Load exchange rate data
data("GBP_USD") # 1 GBP in USD
data("EUR_USD") # 1 EUR in USD
data("JPY_USD") # 1 JPY in USD
data("CHF_USD") # 1 CHF in USD
FX <- merge(GBP_USD, EUR_USD, JPY_USD, CHF_USD)
head(FX)
plot.zoo(FX, xlab = "Time", main = "Exchange rates to USD")
X <- returns(FX)
plot.zoo(X, xlab = "Time", main = "Log-returns of exchange rates to USD",
         col = c("black", "royalblue3", "maroon3", "darkorange2"))


#### Zero-coupon bond yields ##

## Load zero-coupon bond yield data (in CAD)
data("ZCB_CA")
dim(ZCB_CA)
head(ZCB_CA)

## Change to percentages and select period
ZCB <- 100 * ZCB_CA['2002-01-02/2011-12-30']
cols <- c("royalblue3", "darkorange2", RColorBrewer::brewer.pal(8, name = "Dark2"))
plot.zoo(ZCB[,1:10], xlab = "Time", main = "Percentage yields (first 10 maturities)",
         col = cols)

## Compute differences (first row is removed)
X <- returns(ZCB, method = "diff")

## Pick 3 maturities
X3 <- X[, c(1, 8, 40)]
plot.zoo(X3, xlab = "Time", main = "Differences (3 maturities)", col = cols[c(9, 5, 7)])
pairs(as.zoo(X3), gap = 0, cex = 0.4)

#### Ljung-Box_correlation_tests ##
### 1 Constituent data ##

## Dow Jones constituent data
data("DJ_const")

## We extract a time period and take 29 of 30 DJ stocks
## (omit 'Visa' as it has a very short history in the index)
DJdata <- DJ_const['2006-12-29/2015-12-31',-which(names(DJ_const) == "V")]

## Use plot for zoo objects to get multiple plots
plot.zoo(DJdata, xlab = "Time", main = "DJ component series (without Visa)")

## Build log-returns and aggregate to obtain monthly log-returns
X <- returns(DJdata)
X.m <- apply.monthly(X, FUN = colSums)

### 2 Apply Ljung--Box tests ##
## Compute (lists of) Ljung--Box tests
LB.raw   <- apply(X,        2, Box.test, lag = 10, type = "Ljung-Box")
LB.abs   <- apply(abs(X),   2, Box.test, lag = 10, type = "Ljung-Box")
LB.raw.m <- apply(X.m,      2, Box.test, lag = 10, type = "Ljung-Box")
LB.abs.m <- apply(abs(X.m), 2, Box.test, lag = 10, type = "Ljung-Box")

#### Univariate_stylized_facts ####

## Univariate stylized facts:
## (U1) Return series are not iid although they show little serial correlation;
## (U2) Series of absolute (or squared) returns show profound serial correlation;
## (U3) Conditional expected returns are close to zero;
## (U4) Volatility (conditional standard deviation) appears to vary over time;
## (U5) Extreme returns appear in clusters;
## (U6) Return series are leptokurtic or heavy-tailed (power-like tail).

### 1 Index data ##

## S&P 500, FTSE, SMI
data("SP500")
data("FTSE")
data("SMI")

## Build log-returns
SP500.X <- returns(SP500)
FTSE.X <- returns(FTSE)
SMI.X <- returns(SMI)
## Remove zero returns (caused almost certainly by market closure)
SP500.X <- SP500.X[SP500.X != 0]
SMI.X <- SMI.X[SMI.X != 0]
FTSE.X <- FTSE.X[FTSE.X != 0]

## Merge and aggregate
X <- merge(SP500 = SP500.X, FTSE = FTSE.X, SMI = SMI.X, all = FALSE)
X.w <- apply.weekly(X, FUN = colSums) # weekly log-returns (summing within weeks)

### 2 (U1) and (U2) ##
## Auto- and crosscorrelations of returns and absolute returns
acf(X)
acf(abs(X))
acf(X.w)
acf(abs(X.w))

### 3 (U3) and (U4) ##

## Plot of returns
plot.zoo(X, xlab = "Time", main = "Log-returns") # this (partly) confirms (U3)
## Plot of volatilities
X.vols <- apply.monthly(X, FUN = function(x) apply(x, 2, sd)) # componentwise volatilities
plot.zoo(X.vols, xlab = "Time", main = "Volatility estimates")

### 3 (U5) ##

## Plot the 100 largest losses (already partly confirms (U5))
L <- -SP500.X # consider -log-returns of S&P 500 here
xtr.L <- L[rank(as.numeric(-L)) <= 100] # 100 smallest gains = largest losses
plot(as.numeric(xtr.L), type = "h", xlab = "Time", ylab = "100 largest losses")

## Now consider spacings (certainly not exponentially distributed)
spcs <- as.numeric(diff(time(xtr.L)))
qq_plot(spcs, FUN = qexp, method = "empirical")
if(FALSE) {
  ## This is equivalent to:
  qqplot(qexp(ppoints(length(spcs))), y = spcs, xlab = "Theoretical quantiles",
         ylab = "Sample quantiles")
  qqline(spcs, distribution = qexp)
}

## Compare with a Poisson process (simulated iid data)
L. <- xts(rt(length(L), df = 5), time(L)) # simulate from a t_5
xtr.L. <- L.[rank(as.numeric(-L.)) <= 100]
plot(as.numeric(xtr.L.), type = "h", xlab = "Time", ylab = "100 largest losses")
spcs. <- as.numeric(diff(time(xtr.L.)))
qq_plot(spcs., FUN = qexp, method = "empirical")


### 4 (U6) ##

## Show leptokurtosis (heavier tails than a normal) by Q-Q plots against a normal
layout(t(1:3)) # (1,3)-layout
for (i in 1:3)
  qq_plot(X.w[,i], FUN = qnorm, method = "empirical", main = names(X.w)[i])
layout(1) # restore layout

## Extract p-values
p.LB.raw   <- sapply(LB.raw,   `[[`, "p.value")
p.LB.abs   <- sapply(LB.abs,   `[[`, "p.value")
p.LB.raw.m <- sapply(LB.raw.m, `[[`, "p.value")
p.LB.abs.m <- sapply(LB.abs.m, `[[`, "p.value")
round(cbind(p.LB.raw, p.LB.abs, p.LB.raw.m, p.LB.abs.m), 2)

## Testing univariate normality ####

### 1 Generate data from N(mu, sig^2) and t_nu(mu, sig^2) ##
n <- 1000 # sample size
mu <- 1 # location
sig <- 2 # scale
nu <- 3 # degrees of freedom
set.seed(271) # set seed (for reproducibility)
X.N <- rnorm(n, mean = mu, sd = sig) # sample from N(mu, sig^2)
X.t <- mu + sig * rt(n, df = nu) * sqrt((nu-2)/nu) # sample from t_nu(mu, sqrt((nu-2)/nu)*sig^2) (same variance as N(mu, sig^2))
if(FALSE) {
  var(X.N)
  var(X.t)
  ## => quite apart (but closer for larger n)
}


### 2 Testing for N(mu, sig^2) ##
## We treat mu and sig^2 as unknown and estimate them
mu.N <- mean(X.N)
mu.t <- mean(X.t)
sig.N <- sd(X.N)
sig.t <- sd(X.t)

### 2.1 Tests for general distributions (here applied to the normal) ##
## Applied to normal data (with estimated mu and sig^2)
(ks <- ks.test(X.N, y = "pnorm", mean = mu.N, sd = sig.N)) # Kolmogorov--Smirnov
(cvm <- cvm.test(X.N)) # Cramer--von Mises for normality
(ad <- ad.test(X.N, distr.fun = pnorm, mean = mu.N, sd = sig.N)) # Anderson--Darling
stopifnot(min(ks$p.value, cvm$p.value, ad$p.value) >= 0.05) # => no rejections

## Applied to t data (with estimated mu and sig^2)
(ks <- ks.test(X.t, y = "pnorm", mean = mu.t, sd = sig.t)) # Kolmogorov--Smirnov
(cvm <- cvm.test(X.t)) # Cramer--von Mises for normality
(ad <- ad.test(X.t, distr.fun = pnorm, mean = mu.t, sd = sig.t)) # Anderson--Darling
stopifnot(max(ks$p.value, cvm$p.value, ad$p.value) < 0.05) # => all rejections based on 5%

### 2.2 Tests specifically for the normal distribution ##
## Applied to normal data
(sh <- shapiro.test(X.N)) # Shapiro--Wilk
(ag <- agostino.test(X.N)) # D'Agostino's test
(jb <- jarque.test(X.N)) # Jarque--Bera test
stopifnot(min(sh$p.value, ag$p.value, jb$p.value) >= 0.05) # => no rejections

## Applied to t data
(sh <- shapiro.test(X.t)) # Shapiro--Wilk
(ag <- agostino.test(X.t)) # D'Agostino's test
(jb <- jarque.test(X.t)) # Jarque--Bera test
stopifnot(max(sh$p.value, ag$p.value, jb$p.value) < 0.05) # => all rejections based on 5%


### 2.3 Graphical tests of normality ##
## Applied to normal data
pp_plot(X.N, FUN = function(q) pnorm(q, mean = mu.N, sd = sig.N))
qq_plot(X.N, FUN = function(p) qnorm(p, mean = mu.N, sd = sig.N))

## Applied to t data
pp_plot(X.t, FUN = function(q) pnorm(q, mean = mu.t, sd = sig.t))
qq_plot(X.t, FUN = function(p) qnorm(p, mean = mu.t, sd = sig.t))
## => S-shape => Data seems to come from heavier-tailed distribution than
##    the normal distribution.


### 3 Data application ##
## Prepare risk factor data
data(DJ_const) # constituents data
margin <- c("KO", "MSFT", "INTC", "DIS") # margins considered
DJ.const <- DJ_const['1993-01-01/2000-12-31', margin]
X <- -returns(DJ.const) # compute -log-returns; daily risk-factor changes
X. <- list(daily = X, # daily risk-factor changes
           weekly = apply.weekly(X, FUN = colSums), # weekly risk-factor changes
           monthly = apply.monthly(X, FUN = colSums), # monthly risk-factor changes
           quarterly = apply.quarterly(X, FUN = colSums)) # quarterly risk-factor changes

## Are the risk-factor changes normally distributed?
## Formal tests
pvals <- sapply(X., function(x) apply(x, 2, function(data) jarque.test(data)$p.value))
pvals < 0.05
## => Daily and weekly risk-factor changes are not (univariate) normal

## Q-Q plots
time <- c("daily", "weekly", "monthly", "quarterly")
opar <- par(ask = TRUE) # ask after each plot
for(j in seq_len(ncol(X))) { # for all margins, do...
  for(k in seq_along(time)) { # for daily, weekly, monthly and quarterly data, do...
    qq_plot(X.[[k]][,j], FUN = function(p)
      qnorm(p, mean = mean(X.[[k]][,j]), sd = sd(X.[[k]][,j])),
      main = paste("Q-Q plot for margin",margin[j],"based on",time[k],"data"))
    mtext(paste("p-value:", round(pvals[j,k], 4)), side = 4, line = 1, adj = 0)
  }
}
par(opar)


### 4 Simulation ##
## Under H0, p-values are uniformly distributed. Let's check that for N(0,1) data.
set.seed(271)
pvals.H0 <- sapply(1:200, function(b) jarque.test(rnorm(n))$p.value)
qq_plot(pvals.H0, FUN = qunif)

## This does not hold under H1
set.seed(271)
pvals.H1 <- sapply(1:200, function(b) jarque.test(rt(n, df = nu))$p.value)
qq_plot(pvals.H1, FUN = qunif)
all(pvals.H1 == 0) # all p-values 0

#### Multivariate_stylized_facts ####

## Multivariate stylized facts:
## (M1) Return series show little cross-correlation, except for contemporaneous
##      returns;
## (M2) Absolute returns show profound cross-correlation;
## (M3) Correlations between contemporaneous returns vary over time;
## (M4) Extreme returns often coincide with extreme returns in (several) other
##      time series.

### 1 Index data ##

## FTSE and SMI index data
data("FTSE")
data("SMI")

## Build log-returns and merge
FTSE.X <- returns(FTSE)
SMI.X <- returns(SMI)
X <- merge(FTSE = FTSE.X, SMI = SMI.X, all = FALSE)


### 2 (M1) and (M2) ##

## Autocorrelations of returns and absolute returns
acf(X) # raw
acf(abs(X)) # absolute values


### 3 (M3) ##

## Plot cross-correlation (and volatilities)
X.cor  <- apply.monthly(X, FUN = cor)[,2] # cross-correlations estimated based on monthly windows
X.vols <- apply.monthly(X, FUN = function(x) apply(x, 2, sd)) # componentwise volatilities
X.cor.vols <- merge(X.cor, X.vols)
names(X.cor.vols) <- c("Cross-correlation", "Volatility FTSE", "Volatility SMI")
plot.zoo(X.cor.vols, xlab = "Time", main = "Cross-correlation and volatility estimates")

## Volatilities
FTSE.sig <- as.numeric(X.vols[,"FTSE"])
SMI.sig  <- as.numeric(X.vols[,"SMI"])

## Apply Fisher transformation to cross-correlations
fisher <- function(r) log((1 + r)/(1 - r))/2 # Fisher (z-)transformation
rho <- fisher(X.cor)

## Plot Fisher-transformed cross-correlation against volatility with regression lines
## Note: If (X,Y) are jointly normal with correlation rho, fisher(<sample correlation>)
##       is approximately N(log((1+rho)/(1-rho))/2, 1/(n-3)) distributed (n = sample size).

## FTSE
plot(FTSE.sig, rho, xlab = "Estimated volatility", ylab = "Estimated cross-correlation")
reg <- lm(rho ~ FTSE.sig)
summary(reg)
abline(reg, col = "royalblue3")

## SMI
plot(SMI.sig, rho, xlab = "Estimated volatility", ylab = "Estimated cross-correlation")
reg <- lm(rho ~ SMI.sig)
summary(reg)
abline(reg, col = "royalblue3")

## EXERCISE: Now try and compare with these SWN (strict white noise) data
set.seed(271)
X.t <- xts(rmvt(n = nrow(X), df = 5, sigma = cor(X)), time(X))
X.N <- xts(rmvnorm(n = nrow(X),      sigma = cor(X)), time(X))


### 4 (M4) ###

plot.zoo(X, xlab = "Time", main = "Log-returns") # seems like extremes occur together

X. <- apply(X, 2, rank)
plot(X., main = "Componentwise ranks") # now better visible (bottom-left, top-right corners)
## Note: More on that ("pseudo-observations") in Chapter 7
3schwartz/SpecialeScrAndFun documentation built on May 4, 2019, 6:29 a.m.