Nothing
###################
## Preliminaries ##
###################
## packages
library("coin")
library("strucchange")
library("lattice")
## random seed
rseed <- 20061103
## theme for lattice/trellis graphics
trellis.par.set(theme = canonical.theme(color = FALSE))
######################
## Boston homicides ##
######################
## time series plot
data("BostonHomicide", package = "strucchange")
hom_month <- zoo(as.vector(BostonHomicide$homicides), as.yearmon(as.vector(time(BostonHomicide$homicides))))
hom_year <- aggregate(hom_month, function(x) as.numeric(floor(x)), mean)
plot(hom_month, col = grey(0.7), lwd = 2, ylab = "Number of homicides", xlab = "Time")
lines(hom_year, type = "b")
## monthly data
hom_month <- data.frame(
log_homicides = log(coredata(hom_month) + 0.5),
time = as.numeric(time(hom_month)))
## asymptotic unconditional test
sctest(gefp(log_homicides ~ 1, data = hom_month), functional = supLM(0.1))
## asymptotic conditional test
set.seed(rseed)
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1)
## approximate conditional test
set.seed(rseed)
maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1, distribution = approximate(9999))
## annual data
hom_year <- data.frame(
homicides = coredata(hom_year),
time = time(hom_year))
## asymptotic unconditional test
sctest(gefp(homicides ~ 1, data = hom_year), functional = supLM(0.1))
## asymptotic conditional test
set.seed(rseed)
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1)
## approximate conditional test
set.seed(rseed)
maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1, distribution = approximate(9999))
## note that the manuscript computes the exact conditional p-value
## by exhaustive search (rather than approximating via 10,000 simulations)
###########################
## Hiring discrimination ##
###########################
## data
hire <- cbind(c(2, 0, 0, 0, 5, 14), c(427, 86, 104, 180, 111, 59))
hire <- data.frame(
resp = factor(unlist(sapply(1:nrow(hire), function(i) rep(c("female", "male"), hire[i,])))),
time = as.numeric(rep(1991:1996, rowSums(hire))))
## visualization
set.seed(rseed)
maxstat_test(resp ~ time, data = hire, distribution = approximate(9999), minprob = 0.05)
################
## CO2 reflux ##
################
## data
data("CWD", package = "coin")
lwood <- reshape(CWD, varying = paste("sample", c(2:4,6:8), sep = ""),
timevar = "tree", direction = "long", v.names = "sample")
lwood$tree <- factor(lwood$tree)
## visualization
print(xyplot(sample ~ time | tree, data = lwood, type = "b",
scales = list(y = list(relation = "free")),
layout = c(3, 2), xlab = "Time", ylab = expression(paste(CO[2], " reflux")),
as.table = TRUE))
## test
(cwd_mt <- maxstat_test(sample2 + sample3 + sample4 + sample6 + sample7 + sample8 ~ trend,
data = CWD, distribution = approximate(1e5)))
## maximally selected statistics with 5% critical value
cwd_st <- statistic(cwd_mt, "standardized")
cwd_st <- data.frame(cwd_st, time = CWD$time[1] + CWD$trend[2:11])
cwd_st <- reshape(cwd_st, varying = paste("sample", c(2:4,6:8), sep = ""),
timevar = "tree", direction = "long", v.names = "sample")
cwd_st$tree <- factor(cwd_st$tree)
cwd_q <- qperm(cwd_mt, 0.95)
print(xyplot(sample ~ time | tree, data = cwd_st, type = "b",
panel = function(...) {
panel.xyplot(...)
panel.abline(h = c(-cwd_q, cwd_q), col = 2)
panel.abline(h = 0, col = "gray")
},
layout = c(3, 2), xlab = "Time", ylab = "Test statistics", ylim = c(-3.5, 3.5),
as.table = TRUE))
##################################
## Dow Jones Industrial Average ##
##################################
## data
data("DJIA", package = "strucchange")
djia <- diff(log(DJIA)) * 100
djia_res <- coredata(djia - mean(djia))
djia_trafo <- cbind(Intercept = djia_res, Variance = djia_res^2 - mean(djia_res^2))
djia_time <- time(djia)
## visualization
plot(djia, xlab = "Time", ylab = "Dow Jones stock returns")
## test
set.seed(rseed)
djia_mt <- maxstat_test(djia_trafo ~ as.numeric(djia_time), distribution = approximate(9999), minprob = 0.1)
## maximally selected statistics
apply(abs(statistic(djia_mt, "standardized")), 2, max)
## critical values
qperm(djia_mt, 0.95)
## breakpoint
djia_point <- djia_time[floor(length(djia) * 0.1) + which.max(abs(statistic(djia_mt, "standardized")[,2]))]
## autocorrelation
djia_pre <- window(djia, end = djia_point)
djia_post <- window(djia, start = djia_point + 1)
ar1 <- function(x, digits = 3) {
x <- coredata(x)
x <- x - mean(x)
c(acf(x, plot = FALSE)$acf[2], acf(x^2, plot = FALSE)$acf[2])
}
ar1(djia_pre)
ar1(djia_post)
#######################
## Economic journals ##
#######################
## data
data("Journals", package = "AER")
Journals$age <- 2000 - Journals$foundingyear
Journals <- Journals[order(Journals$age),]
## model in root node
jour_lm <- lm(log(subs) ~ log(price/citations), data = Journals)
## test in root node
set.seed(rseed)
(jour_mt <- maxstat_test(estfun(jour_lm) ~ age, data = Journals, distribution = approximate(9999), minprob = 0.1))
## test information
jour_critval <- qperm(jour_mt, 0.95^(1/4))
jour_process <- statistic(jour_mt, "standardized")
jour_process <- zoo(jour_process, as.numeric(sapply(strsplit(rownames(jour_process), "x <= "), tail, 1)))
colnames(jour_process) <- c("Intercept", "Slope")
jour_point <- time(jour_process)[apply(coredata(abs(jour_process)), 2, which.max)]
## visualization of test in root node
mypanel <- function(x, y, subscripts, groups, panel = panel.xyplot,
col = 1, type = "p", pch = 20, lty = 1, lwd = 1, ...)
{
col <- rep(as.list(col), length = nlevels(groups))
type <- rep(as.list(type), length = nlevels(groups))
pch <- rep(as.list(pch), length = nlevels(groups))
lty <- rep(as.list(lty), length = nlevels(groups))
lwd <- rep(as.list(lwd), length = nlevels(groups))
for(g in 1:nlevels(groups)) {
idx <- g == groups[subscripts]
if (any(idx)) panel(x[idx], y[idx], ...,
col = col[[g]], type = type[[g]], pch = pch[[g]],
lty = lty[[g]], lwd = lwd[[g]])
grid::grid.lines(y = grid::unit(0, "native"), gp = grid::gpar(col = "gray"))
grid::grid.lines(y = grid::unit(jour_critval, "native"), gp = grid::gpar(col = 2))
}
}
print(xyplot(abs(jour_process), panel = mypanel,
xlab = "Time", type = "b", ylim = c(0, 6), as.table = TRUE))
### fit node with h(Y) = score and g(X) = maxstat_trafo
ytrf <- function(data) {
ret <- estfun(lm(data[[1]] ~ data[[2]]))
attr(ret, "assign") <- 1:2
ret
}
xtrf <- function(data) trafo(data, numeric_trafo = maxstat_trafo)
mynode <- function(data) {
vars <- c("society", "pages", "charpp", "age")
sapply(vars, function(v) {
f <- as.formula(paste("log(subs) + log(price/citations) ~", v))
it <- independence_test(f, data = data,
ytrafo = ytrf, xtrafo = xtrf, distribution = approximate(9999))
c(statistic(it), 1 - (1 - pvalue(it))^length(vars))
})
}
## fit all tree elements
jour_node <- factor(Journals$age <= 18, levels = c(TRUE, FALSE), labels = c("Node 2", "Node 3"))
jour_lm2 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 2",])
jour_lm3 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 3",])
## conduct tests in all leaves
set.seed(rseed)
jour_tree <- list(
mynode(Journals),
mynode(Journals[jour_node == "Node 2",]),
mynode(data = Journals[jour_node == "Node 3",])
)
## fitted models
plot(log(subs) ~ log(price/citations), data = Journals,
xlab = "log(price/citations)", ylab = "log(subscriptions)",
pch = c(24, 21)[jour_node], bg = hcl(c(0, 240), 50, 70)[jour_node])
abline(coef(jour_lm2), col = hcl( 0, 80, 30), lty = 5, lwd = 1.7)
abline(coef(jour_lm3), col = hcl(240, 80, 30), lty = 1, lwd = 1.7)
legend("bottomleft", c(expression(age > 18), expression(age <= 18)),
pch = c(19, 17), lty = c(1, 5), col = hcl(c(240, 0), 80, 30), bty = "n")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.