plot_TEP_proj <- function(D = nrow(x_train), sim_nr = 2, fault_nr = 11) {
x_train <- get_TEP_train(1, return_all = FALSE)
x_train <- lag_extend(x_train, 3)
x_train <- (x_train - rowMeans(x_train)) / rowSds(x_train)
m <- ncol(x_train)
Sigma0_hat <- 1 / (m - 1) * x_train %*% t(x_train)
pca_obj <- tpca::pca(Sigma0_hat)
V <- pca_obj$vectors
lambda <- pca_obj$values
z_train <- V %*% x_train / sqrt(lambda)
x_test <- get_TEP_test(sim_nr, fault_nr, return_all = FALSE)[[1]]
x_test <- lag_extend(x_test, 3)
x_test <- (x_test - rowMeans(x_test)) / rowSds(x_test)
z_test <- V %*% x_test / sqrt(lambda)
kappa <- 160
n_before <- 2 * kappa
n_after <- 160
for (d in D) {
print(d)
z_d <- c(z_train[d, ], z_test[d, ])
par(mfrow = c(2, 1))
plot(z_d[(m + kappa - n_before):(m + kappa + n_after)], type = 'l',
main = paste0('Projection ', d))
abline(v = n_before - kappa, lwd = 2)
abline(v = n_before, lwd = 2, col = 'red')
acf(z_train[d, ])
Sys.sleep(2)
}
}
get_rl_file <- function(lag, fault_nr, change_distr = 'semisparse_uniform') {
dir <- './results/'
n <- 160 - lag
unlagged_dim <- 52
d <- unlagged_dim * (lag + 1)
alpha_str <- '01'
m <- 500 - lag
paste0(dir, 'TEP_rl_fault', fault_nr, '_', change_distr,
'_n', n, 'alpha', alpha_str, 'm', m, 'd', d, 'l', lag, '.txt')
}
get_rl_table <- function(lag, fault_nr, keep_names = NULL,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only')) {
n_distr <- length(change_distr)
rl_file <- get_rl_file(lag, fault_nr, change_distr[1])
rl_df <- read.table(rl_file, header = TRUE, sep = ' ')
tpca_ind <- grepl('tpca', names(rl_df))
names(rl_df)[tpca_ind] <- paste0(names(rl_df)[tpca_ind], ' ', change_distr[1])
if (n_distr > 1) {
for (i in 2:n_distr) {
rl_file <- get_rl_file(lag, fault_nr, change_distr[i])
rl_df_temp <- read.table(rl_file, header = TRUE, sep = ' ')
rl_df_temp <- rl_df_temp[, grepl('tpca', names(rl_df_temp))]
names(rl_df_temp) <- paste0(names(rl_df_temp), ' ', change_distr[i])
rl_df <- cbind(rl_df, rl_df_temp)
}
}
dpca_rl <- read.table('./results/TEP_rl_dpca_analytical.txt', sep = ' ', header = TRUE)
rl_df$dpca <- dpca_rl[, fault_nr]
if (!is.null(keep_names)) rl_df <- rl_df[keep_names]
rl_df
}
rl_hist <- function(lag, fault_nr,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only')) {
n_distr <- length(change_distr)
rl_df <- get_rl_table(lag, fault_nr, change_distr = change_distr)
par(mfrow = c(2 + n_distr, 3))
for (i in 1:length(rl_df)) {
hist(rl_df[[i]], breaks = 20, probability = TRUE,
xlab = 'run length', main = names(rl_df)[i])
}
}
setup_legend <- function(method_names) {
legend_info <- list('max_pca' = c('Max DPCA', 'darkgoldenrod'),
'min_pca' = c('Min DPCA', 'cornflowerblue'),
'semisparse_uniform' = c('TDPCA(unif)', 'firebrick3'),
'semisparse_mean_only' = c('TDPCA(mean)', 'mediumpurple3'),
'semisparse_sd_only' = c('TDPCA(var)', 'darkgreen'),
'dpca' = c('DPCA', 'black'))
col <- rep('', along = method_names)
label <- rep('', along = method_names)
for (i in seq_along(legend_info)) {
ind <- which(grepl(names(legend_info)[i], method_names))
label[ind] <- legend_info[[i]][1]
col[ind] <- legend_info[[i]][2]
}
return(list('col' = col, 'label' = label))
}
#' Plot density estimates of the run lengths
#'
#' WARNING: Before running this function
#' your must either have run \code{run_TEP_sim} yourself, or have downloaded
#' the results/ and thresholds/ directories from https://github.com/Tveten/tdpcaTEP
#' and put them in your working directory.
#'
#' @param lag The lag used in TDPCA.
#' @param fault_nr A number between 1 and 20 indicating the fault number.
#' @param keep_names A vector of names indicating which methods should be kept for
#' display (for internal use).
#' @param change_distr Which change distributions TDPCA to show results for
#' (see \code{?tpca::tpca}).
#' @param xlim A vector with lower and upper bounds for the x-axis.
#'
#' @export
rl_density <- function(lag, fault_nr, keep_names = NULL,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only'),
xlim = c(50, 300)) {
n_distr <- length(change_distr)
rl_df <- get_rl_table(lag, fault_nr, keep_names, change_distr)
kappa <- 160 - lag
lwd_lines <- 1.3
legend_setup <- setup_legend(names(rl_df))
col <- legend_setup$col
label <- legend_setup$label
adj <- 1
dens_ests <- lapply(rl_df, function(rl_vec) density(rl_vec, adjust = adj))
max_y <- max(unlist(lapply(dens_ests, function(dens_est) max(dens_est$y))))
plot(dens_ests[[1]], xlab = 'Run length', col = col[1],
xlim = xlim, ylim = c(0, max_y), lwd = lwd_lines,
main = paste0('Fault ', fault_nr))
for (i in 2:length(rl_df)) {
lines(dens_ests[[i]], col = col[i], lwd = lwd_lines)
}
abline(v = kappa, lwd = 3, lty = 2)
legend('topleft', unique(label), col = unique(col), lty = 1, lwd = lwd_lines)
}
p_false <- function(lag, fault_nr = 11, keep_names = NULL,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only')) {
kappa <- 160 - lag
rl_df <- get_rl_table(lag, fault_nr, keep_names, change_distr)
apply(rl_df, 2, function(x) round(sum(x <= kappa) / length(x), 3))
}
est_edd <- function(lag, fault_nr = 11, keep_names = NULL,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only')) {
kappa <- 160 - lag
rl_df <- get_rl_table(lag, fault_nr, keep_names, change_distr)
apply(rl_df, 2, function(x) round(mean(x[x > kappa]) - kappa, 1))
}
#' Make a table of EDD results and PFA for all tested methods.
#'
#' WARNING: Before running this function
#' your must either have run \code{run_TEP_sim} yourself, or have downloaded
#' the results/ and thresholds/ directories from https://github.com/Tveten/tdpcaTEP
#' and put them in your working directory.
#'
#' @param lag The lag used in TDPCA.
#' @param fault_nr A number between 1 and 20 indicating the fault number.
#' @param keep_names A vector of names indicating which methods should be kept for
#' display (for internal use).
#' @param change_distr Which change distributions TDPCA to show results for
#' (see \code{?tpca::tpca}).
#' @param sort_by If not NULL, either 'edd' or 'pfa', to sort the output table.
#' @param show_plot Logical. Should a density plot also be shown?
#'
#' @export
TEP_summary <- function(lag, fault_nr, keep_names = NULL,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only'),
sort_by = NULL, show_plot = TRUE, ...) {
if (show_plot) rl_density(lag, fault_nr, keep_names, change_distr, ...)
pfa <- p_false(lag, fault_nr, keep_names, change_distr)
edd <- est_edd(lag, fault_nr, keep_names, change_distr)
summary_table <- data.frame('method' = names(edd),
'edd' = edd,
'pfa' = pfa)
row.names(summary_table) <- NULL
if (!is.null(sort_by))
summary_table <- summary_table[order(summary_table[[sort_by]]), ]
summary_table
}
#' Make a table summarizing the EDD results for all faults.
#'
#' This function reproduces the TEP table in the paper.
#' WARNING: Before running this function
#' your must either have run \code{run_TEP_sim} yourself, or have downloaded
#' the results/ and thresholds/ directories from https://github.com/Tveten/tdpcaTEP
#' and put them in your working directory.
#'
#' Prints the PFAs and sorts the columns of the output table in the order of PFA.
#'
#' @export
pfa_edd_table <- function() {
lag <- 5
fault_nr <- 1:20
keep_names <- c('max_pca20', 'min_pca20', 'tpca0.9 semisparse_uniform',
'tpca0.9 semisparse_mean_only', 'tpca0.99 semisparse_sd_only',
'dpca')
labels <- setup_legend(keep_names)$label
edd_table <- as.data.frame(matrix(0, ncol = length(labels),
nrow = length(fault_nr)))
names(edd_table) <- labels
for (i in fault_nr) {
edd_table[i, ] <- est_edd(lag, fault_nr[i], keep_names)
}
pfa <- p_false(lag, 1, keep_names)
print('Probabilities of false alarms:')
print(sort(pfa))
print('EDD table:')
edd_table[order(pfa)]
}
joint_TEP_summary <- function(lag,
change_distr = c('semisparse_uniform',
'semisparse_sd_only',
'semisparse_mean_only'),
sort_by = NULL) {
table4 <- TEP_summary(lag, 4, show_plot = FALSE)
table11 <- TEP_summary(lag, 11, show_plot = FALSE)
joint_table <- cbind(table4, table11[, 2:3])
names(joint_table) <- c('method', 'edd4', 'p_false4', 'edd11', 'p_false11')
joint_table$method <- c('Max PCA (J = 1)', 'Max PCA (J = 5)', 'Max PCA (J = 20)',
'Min PCA (J = 1)', 'Min PCA (J = 5)', 'Min PCA (J = 20)',
'TPCA (uniform, c = 0.8)', 'TPCA (uniform, c = 0.99)', 'TPCA (uniform, c = 0.999)',
'TPCA (var, c = 0.8)', 'TPCA (var, c = 0.99)', 'TPCA (var, c = 0.999)',
'TPCA (mean, c = 0.8)', 'TPCA (mean, c = 0.99)', 'TPCA (mean, c = 0.999)')
rownames(joint_table) <- NULL
if (!is.null(sort_by))
joint_table <- joint_table[order(joint_table[[sort_by]]), ]
xtable::xtable(joint_table, digits = c(0, 0, 1, 3, 1, 3))
}
save_density_plots <- function(lag) {
base_width <- 5
base_height <- 5
n_col <- 2
n_row <- 1
width <- n_col * base_width
height <- n_row * base_height
fault_nr <- c(10, 11)
dir <- './results/figures/'
file_name <- paste0('rl_densities_TEP_fault', paste(fault_nr, collapse = '-'), '.png')
png(file = paste0(dir, file_name), width = width, height = height, units = 'in', res = 480)
par(mfrow = c(1, 2))
xlim <- list(c(60, 250), c(60, 250))
keep_names <- c('max_pca20', 'min_pca20', 'tpca0.9 semisparse_uniform',
'tpca0.9 semisparse_mean_only', 'tpca0.99 semisparse_sd_only',
'dpca')
rl_density(lag, fault_nr = fault_nr[1], keep_names, xlim = xlim[[1]])
rl_density(lag, fault_nr = fault_nr[2], keep_names, xlim = xlim[[2]])
dev.off()
}
get_TEP_thresholds <- function(change_distr) {
get_tpca_train_info <- function(cov_mat_nr, change_distr) {
dir <- './thresholds/axes/'
axes_location <- paste0(dir, 'tpca_axes_', cov_mat_nr, '_m', m, 'd', d, '.txt')
all_lines_vec <- readLines(axes_location)
all_lines_split <- strsplit(all_lines_vec, ' ')
axes_list <- list()
cutoffs <- list()
for (i in seq_along(all_lines_split)) {
line <- all_lines_split[[i]]
if (line[1] == change_distr) {
axes_list[[length(axes_list) + 1]] <- as.numeric(line[3:length(line)])
cutoffs[[length(cutoffs) + 1]] <- as.numeric(line[2])
}
}
cutoffs <- unlist(cutoffs)
names(axes_list) <- as.character(cutoffs)
list('axes' = axes_list, 'cutoffs' = cutoffs)
}
get_tpca_threshold <- function(m, d, cov_mat_nr, axes) {
dir <- './thresholds/'
alpha_str <- strsplit(as.character(alpha), '[.]')[[1]][2]
threshold_file <- paste0(dir, 'tpca_thresholds_', cov_mat_nr,
'_n', n, 'alpha', alpha_str,
'm', m, 'd', d, '_FINAL.txt')
threshold_df <- read.table(threshold_file, header = TRUE, sep = ' ')
threshold_df$axes <- as.character(threshold_df$axes)
axes_in_df <- lapply(strsplit(threshold_df$axes, '-'), as.numeric)
axes_in_df <- lapply(axes_in_df, sort)
axes <- sort(axes)
ind <- which(vapply(axes_in_df, is_equal_vectors, logical(1), y = axes))
threshold <- threshold_df$threshold[ind]
if (length(threshold) == 0)
stop(paste0('Threshold for m = ', m, ', d = ', d, ', axes = (',
paste(axes, collapse = ', '), ') does not exist.'))
if (length(threshold) > 1) {
threshold <- threshold[1]
warning('More than one threshold returned from file. Picking the first one.')
}
threshold
}
m <- 497
d <- 132
alpha <- 0.01
n <- 160
cov_mat_nr <- 2
thresholds_list <- list()
tpca_info <- get_tpca_train_info(cov_mat_nr, change_distr)
tpca_threshold_df <- data.frame('cutoffs' = tpca_info$cutoffs)
tpca_threshold_df$axes <- lapply(tpca_info$axes, function(axis) {
paste(axis, collapse = '-')
})
tpca_threshold_df$threshold <- lapply(tpca_info$axes, function(axis) {
get_tpca_threshold(m, d, cov_mat_nr, axis)
})
thresholds_list$tpca <- tpca_threshold_df
r_pca <- c(1, 2, 3, 5, 10, 20)
axes_max_pca <- lapply(r_pca, function(j) 1:j)
axes_min_pca <- lapply(r_pca, function(j) (d - j + 1):d)
min_pca_threshold_df <- data.frame('J' = r_pca)
max_pca_threshold_df <- data.frame('J' = r_pca)
min_pca_threshold_df$threshold <- lapply(axes_min_pca, function(axis) {
get_tpca_threshold(m, d, cov_mat_nr, axis)
})
max_pca_threshold_df$threshold <- lapply(axes_max_pca, function(axis) {
get_tpca_threshold(m, d, cov_mat_nr, axis)
})
thresholds_list$min_pca <- min_pca_threshold_df
thresholds_list$max_pca <- max_pca_threshold_df
thresholds_list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.