Nothing
build_data_set <- function(n, seed = 0) {
withr::local_seed(seed)
blink <- stats::binomial()
covariate <- data.frame(y = runif(n))
coeffs <- matrix(c(-1, 2, 1, -2), ncol = 2)
scores <- cbind(1, covariate$y) %*% coeffs
probs <- apply(scores, 1, blink$linkinv)
x <- rep(0, n)
x[1] <- sample(c(0, 1), 1)
for (k in 2:n) {
prob <- probs[x[k - 1] + 1, k - 1]
x[k] <- sample(c(0, 1), 1, prob = c(1 - prob, prob))
}
list(x = x, covariate = covariate)
}
build_data_set_2 <- function(seed = 0) {
withr::local_seed(seed)
y <- as.factor(rep(rep(1:4, each = 50), 2))
z <- runif(length(y))
dts_cov <- data.frame(y = y, z = z)
cov_dep <- matrix(runif(length(levels(y)) * 4, min = -1, max = 1), nrow = length(levels(y)))
cov_dep[cov_dep > 0.5] <- 1
cov_dep[cov_dep < -0.5] <- -1
cov_dep[cov_dep <= 0.5 & cov_dep >= -0.5] <- 0
x <- rep(0, length(y))
x[2] <- sample(c(0, 1), 1)
probs <- rep(NA, length(y))
for (k in 3:length(y)) {
probs[k] <- 1 / (1 + exp(-sum(cov_dep[as.integer(y[k - 1]), ] * c(1, x[k - 1], z[c(k - 1, k - 2)]))))
x[k] <- ifelse(runif(1) <= probs[k], 1, 0)
}
x <- as.factor(x)
list(x = x, covariate = dts_cov)
}
build_data_set_3_model <- function(n, seed = 0, alpha = 0.1) {
withr::local_seed(seed)
x3 <- sample(c("A", "B", "C"), n, replace = TRUE)
y3 <- ifelse(runif(length(x3)) > 0.5, c(x3[-1], sample(c("A", "B", "C"), 1)), c(x3[-c(1, 2)], sample(c("A", "B", "C"), 2, replace = TRUE)))
y3 <- as.factor(ifelse(runif(length(x3)) > 0.2, y3, sample(c("A", "B", "C"), n, replace = TRUE)))
z3 <- data.table::fcase(
x3[-1] == "A", runif(length(x3) - 1),
x3[-1] == "B", 0.5 + runif(length(x3) - 1),
x3[-1] == "C", -0.5 + runif(length(x3) - 1)
)
z3 <- c(0, z3)
df_full <- data.frame(y = y3, z = z3)
model <- covlmc(x3, df_full, max_depth = 5, min_size = 5, alpha = alpha)
list(model = model, dts = x3, cov = df_full)
}
extract_p_value <- function(tree) {
recurse_extract_p_value <- function(tree) {
if (is.null(tree[["children"]])) {
if (is.null(tree[["model"]])) {
# nothing there (should not happen)
NULL
} else {
p_value <- NA
if (!is.null(tree$model[["p_value"]])) {
p_value <- tree$model[["p_value"]]
}
data.frame(p_value = p_value, nb_coeffs = length(tree$model[["coefficients"]]))
}
} else {
df <- NULL
for (v in seq_along(tree[["children"]])) {
sub_p <- recurse_extract_p_value(tree$children[[v]])
if (!is.null(sub_p)) {
df <- rbind(df, sub_p)
}
}
if (!is.null(tree[["merged_model"]])) {
if (!is.na(tree[["merged_model"]]$p_value)) {
sub_p <- data.frame(
p_value = tree[["merged_model"]]$p_value,
nb_coeffs = length(tree[["merged_model"]]$coefficients)
)
df <- rbind(df, sub_p)
}
}
if (!is.null(tree$p_value)) {
sub_p <- data.frame(
p_value = tree$p_value,
nb_coeffs = NA
)
df <- rbind(df, sub_p)
}
if (!is.null(tree$merged_p_value)) {
sub_p <- data.frame(
p_value = tree$merged_p_value,
nb_coeffs = NA
)
df <- rbind(df, sub_p)
}
df
}
}
recurse_extract_p_value(tree)
}
compare_covlmc_node <- function(n1, n2) {
for (what in c("f_by", "match", "prunable", "vals", "depth", "nb_ctx", "cov_names", "alpha", "merged")) {
if (!identical(n1[[what]], n2[[what]])) {
return(FALSE)
}
}
if (!is.null(n1[["model"]])) {
if (is.null(n2[["model"]])) {
return(FALSE)
}
if (!identical(n1[["model"]][["coefficents"]], n2[["model"]][["coefficents"]])) {
return(FALSE)
}
}
if (!is.null(n1[["merged_model"]])) {
if (is.null(n2[["merged_model"]])) {
return(FALSE)
}
if (!identical(n1[["merged_model"]][["coefficents"]], n2[["merged_model"]][["coefficents"]])) {
return(FALSE)
}
}
if (!is.null(n1[["extended_model"]])) {
if (is.null(n2[["extended_model"]])) {
return(FALSE)
}
if (!identical(n1[["extended_model"]][["coefficents"]], n2[["extended_model"]][["coefficents"]])) {
return(FALSE)
}
}
TRUE
}
compare_covlmc <- function(m1, m2) {
rec_compare_covlmc <- function(m1, m2) {
if (!compare_covlmc_node(m1, m2)) {
return(FALSE)
}
if (is.null(m1[["children"]])) {
if (!is.null(m2[["children"]])) {
return(FALSE)
}
return(TRUE)
} else {
if (is.null(m2[["children"]])) {
return(FALSE)
}
if (length(m1[["children"]]) != length(m2[["children"]])) {
return(FALSE)
}
for (v in seq_along(m1[["children"]])) {
if (!rec_compare_covlmc(m1[["children"]][[v]], m2[["children"]][[v]])) {
return(FALSE)
}
}
return(TRUE)
}
}
rec_compare_covlmc(m1, m2)
}
build_degenerate_elec_model <- function(with_new_data = FALSE) {
pc_week_15_16 <- powerconsumption[powerconsumption$week %in% c(15, 16), ]
elec <- pc_week_15_16$active_power
elec_dts <- cut(elec, breaks = c(0, 0.4, 2, 8), labels = c("low", "typical", "high"))
elec_cov <- data.frame(day = (pc_week_15_16$hour >= 7 & pc_week_15_16$hour <= 18))
elec_tune <- tune_covlmc(elec_dts, elec_cov, min_size = 5)
elec_model <- prune(as_covlmc(elec_tune), alpha = 3.961e-10)
result <- list(model = elec_model, dts = elec_dts, cov = elec_cov)
if (with_new_data) {
pc_week_17_18 <- powerconsumption[powerconsumption$week %in% c(17, 18), ]
elec_new_dts <- cut(pc_week_17_18$active_power, breaks = c(0, 0.4, 2, 8), labels = c("low", "typical", "high"))
elec_new_cov <- data.frame(day = (pc_week_17_18$hour >= 7 & pc_week_17_18$hour <= 18))
result$new_cov <- elec_new_cov
result$new_dts <- elec_new_dts
}
result
}
## same calculations as in slow_loglikelihood but for COVLMC
co_slow_loglikelihood <- function(model, newdata, initial = c("truncated", "specific", "extended"),
ignore, newcov, verbose = FALSE) {
initial <- match.arg(initial)
if (missing(ignore)) {
if (initial == "truncated") {
ignore <- depth(model)
} else {
ignore <- 0L
}
} else if (initial == "truncated" && ignore < depth(model)) {
stop("Must ignore at least ", depth(model), " for truncated likelihood")
}
nx <- to_dts(newdata, model$vals)
if (length(model$vals) > 2) {
newdata <- nx$fx
} else {
newdata <- nx$ix
}
x <- nx$ix + 1
ctx <- c()
max_depth <- depth(model)
if (max_depth == 0) {
## fixed model (and no covariate!)
glmdata <- prepare_glm(newcov, model$match, 0, newdata, 0)
result <- glm_likelihood(model$model$model, glmdata$local_mm, glmdata$target)
} else {
result <- 0
discarded <- 0
for (i in seq_along(newdata)) {
subtree <- match_context_co(model, ctx)
the_model <- NULL
to_keep <- (initial != "specific" && i > ignore) ||
(initial == "specific" && i > max(max_depth, ignore))
## do we have a true match or an extended one?
if (i >= max_depth + 1) {
## true match
if (subtree$merged) {
if (verbose) {
cat("merged model\n")
}
the_model <- subtree$tree$merged_model
} else if (!is.null(subtree$tree[["model"]])) {
the_model <- subtree$tree$model
} else {
stop("No match???")
}
} else {
## extended match
if (subtree$merged) {
if (verbose) {
cat("merged model\n")
}
the_model <- subtree$tree$merged_model
} else if (!is.null(subtree$tree[["model"]])) {
if (verbose) {
cat("extended use of context\n")
}
the_model <- subtree$tree$model
} else if (!is.null(subtree$tree[["extended_model"]])) {
if (verbose) {
cat("extended context\n")
}
the_model <- subtree$tree$extended_model
} else {
stop("No extended match???")
}
}
mm <- prepare_covariate(newcov,
i - subtree$depth - 1,
d = the_model$hsize,
from = subtree$depth - the_model$hsize
)
ll <- glm_likelihood(the_model$model, mm, newdata[i])
if (verbose) {
cat(
"[", paste(rev(model$vals[ctx]), collapse = "-"), "] ->",
paste(model$vals[newdata[i]]), ll,
the_model$coefficients,
"\n"
)
}
if (to_keep) {
result <- result + ll
} else {
discarded <- discarded + ll
}
## ctx <- rev(x[(i-max_depth+1):i])
j <- max(i - max_depth + 1, 1)
ctx <- x[i:j]
# print(paste(i,j))
}
result <- as.numeric(result)
}
if (verbose) {
print(as.numeric(discarded))
}
attr(result, "nobs") <- max(0, length(x) - ignore)
if (initial == "truncated") {
attr(result, "df") <- count_parameters(model, FALSE)
} else if (initial == "specific") {
attr(result, "df") <- count_parameters(model, FALSE) + max_depth
} else {
attr(result, "df") <- count_parameters(model, TRUE)
}
result
}
create_demo_covlmc <- function() {
withr::local_seed(0)
x3 <- sample(c("A", "B", "C"), 1000, replace = TRUE)
y3 <- ifelse(runif(length(x3)) > 0.5, c(x3[-1], sample(c("A", "B", "C"), 1)), c(x3[-c(1, 2)], sample(c("A", "B", "C"), 2, replace = TRUE)))
y3 <- as.factor(ifelse(runif(length(x3)) > 0.2, y3, sample(c("A", "B", "C"), 1000, replace = TRUE)))
z3 <- data.table::fcase(
x3[-1] == "A", runif(length(x3) - 1),
x3[-1] == "B", 0.5 + runif(length(x3) - 1),
x3[-1] == "C", -0.5 + runif(length(x3) - 1)
)
z3 <- c(0, z3)
df_y3 <- data.frame(y = y3)
x3_covlmc <- covlmc(x3, df_y3, max_depth = 5, min_size = 5, alpha = 0.01)
m_cuts <- cutoff(x3_covlmc)
m_current <- x3_covlmc
for (k in seq_along(m_cuts[1:4])) {
m_current <- prune(m_current, m_cuts[k])
}
list(model = m_current, dts = x3, cov = df_y3, full_model = x3_covlmc)
}
create_merged_dataset <- function(n = 1000, seed = 0) {
withr::local_seed(seed)
y <- rnorm(n)
z <- sample(c("K", "L", "M"), n, replace = TRUE, prob = sample(1:4, 3, replace = TRUE))
cov <- data.frame(y = y, z = z)
mcov <- model.matrix(~ y + z, cov)
alpha_1 <- matrix(rnorm(2 * ncol(mcov)), ncol = 2)
alpha_2 <- matrix(rnorm(2 * ncol(mcov)), ncol = 2)
prob_1 <- t(apply(mcov %*% alpha_1, 1, \(x) c(1, exp(x)) / sum(exp(x) + 1)))
prob_2 <- t(apply(mcov %*% alpha_2, 1, \(x) c(1, exp(x)) / sum(exp(x) + 1)))
res <- rep(sample(1:3, 1), n)
for (k in 2:n) {
if (res[k - 1] == 1) {
res[k] <- sample(1:3, 1, prob = prob_1[k - 1, ])
} else {
res[k] <- sample(1:3, 1, prob = prob_2[k - 1, ])
}
}
list(dts = x, cov = cov)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.