knitr::opts_chunk$set(echo = TRUE)
library(algebraic.mle) library(dplyr) library(ggplot2) library(reshape2) library(knitr) library(md.tools) library(md.series.system) library(utils) library(tidyverse) library(gridExtra)
# this is the actual code i ran to generate the data set for # `exp_experiment_5.csv`. this was done before i generalized the approach. # i'm keeping all of the code the same since this is actually what was run # to generate the data set, but it should be the same as what i posted # below in block `run-exp-experiment-5` generate_data <- function(n, theta, tau, p) { m <- length(theta) comp_times <- matrix(nrow=n,ncol=m) for (j in 1:m) comp_times[,j] <- rexp(n,theta[j]) comp_times <- md_encode_matrix(comp_times,"t") comp_times %>% md_series_lifetime_right_censoring(tau) %>% md_bernoulli_cand_C1_C2_C3(p) %>% md_cand_sampler() } custom_solver <- function(data, theta, extra_info = NULL, annealing = TRUE) { ll <- md_loglike_exp_series_C1_C2_C3(data) ll.grad <- md_score_exp_series_C1_C2_C3(data) fish <- md_fim_exp_series_C1_C2_C3(data) ll.hess <- function(x) -fish(x) theta.hat <- NULL start <- NULL m <- length(theta) tryCatch({ start <- list(par = theta) if (annealing) { start <- sim_anneal(par = theta, fn = ll, control = list(fnscale = -1, maxit = 10000L, trace = FALSE, t_init = 1, 1e-2, alpha = 0.9, it_per_temp = 10L, sup = function(x) all(x > 0))) } res_optim <- optim( par = start$par, fn = ll, gr = ll.grad, method = "L-BFGS-B", lower = 1e-30, hessian = FALSE, control = list(fnscale = -1, maxit = 1000L)) theta.hat <- mle_numerical(res_optim, options = list(hessian = ll.hess(res_optim$par))) }, error = function(e) { cat("Sample size", nrow(data), " | Anneal:", start$par, " | ", e$message) if (!is.null(extra_info)) { cat(" | ", extra_info) } cat("\n") }) theta.hat } exp_experiment_5_gen <- function( csv_filename, R = 999, bernoulli_probs = c(.2, .3, .4, .5), quants = c(.1, .2, .3, .4, .5), sample_size = c(25, 50, 100, 200, 400), append = TRUE, use_aneal_start = TRUE) { set.seed(32861) # set seed for reproducibility theta <- c(1, # component 1 failure rate 1.1, # 2 0.98, # 3 1.12, # 4 1.05) # 5 m <- length(theta) if (!append) { cnames <- c("R", "p", "tau", "q", "N", paste0("bias",1:m), "mse", paste0("se",1:m), paste0("se_asym",1:m), "mse_asym", "mse_asym_hat", paste0("coverage",1:m)) # Write column names first write.table(t(cnames), file = csv_filename, sep = ",", col.names = FALSE, row.names = FALSE, append = FALSE, quote = FALSE) } for (i in 1:length(bernoulli_probs)) { p <- bernoulli_probs[i] cat("Starting simulations for Bernoulli probability", p, "\n") for (j in 1:length(sample_size)) { N <- sample_size[j] cat("Starting simulations for sample size", N, "\n") for (k in 1:length(quants)) { q <- quants[k] cat("Starting simulations for quantile", q, "\n") tau <- -(1/sum(theta))*log(q) mles <- matrix(nrow = R, ncol = m) CI_lwr <- matrix(nrow = R, ncol = m) CI_upr <- matrix(nrow = R, ncol = m) j <- 1L repeat { data <- generate_data(N, theta, tau, p) theta.hat <- custom_solver( data = data, theta = theta, extra_info = paste0("Replicate(", j, ")"), annealing = use_aneal_start) if (is.null(theta.hat)) { next } if (j %% 10 == 0) { cat("Sample size", N, " | Replicate ", j, " | MLE ", point(theta.hat), "\n") } mles[j, ] <- point(theta.hat) CI <- confint(theta.hat) if (any(is.nan(CI))) { print("NaN in CI") print(summary(theta.hat)) } CI_lwr[j, ] <- CI[,1] CI_upr[j, ] <- CI[,2] j <- j + 1L if (j > R) { break } } # compute asymptotics theta.mle <- NULL while (is.null(theta.mle)) { data <- generate_data(N, theta, tau, p) theta.mle <- custom_solver( data = data, theta = theta, extra_info = "asymptotics", annealing = use_aneal_start) } SE.asym <- se(theta.mle) MSE.asym <- mse(theta.hat, theta) MSE.asym.hat <- mse(theta.mle) # Calculate bias, MSE, and SE for each parameter bias <- colMeans(mles) - theta MSE <- sum(colMeans((mles - theta)^2)) sigma <- cov(mles) * ((sample_size - 1) / sample_size) SE <- sqrt(diag(sigma)) # Compute coverage probabilities coverage <- colMeans((CI_lwr <= theta) & (theta <= CI_upr)) datum <- c(R, p, tau, q, N, bias, MSE, SE, SE.asym, MSE.asym, MSE.asym.hat, coverage) write.table(t(datum), file = csv_filename, sep = ",", col.names = FALSE, row.names = FALSE, append = TRUE) } } } } exp_experiment_5_gen( append = FALSE, csv_filename = "exp_experiment_5.csv", use_aneal_start = TRUE)
The purpose of this data set is to analyze the sensivity of the exponential series system (5 components, all roughly the same failure rate) with respect to a change in:
- Bernoulli probabilities (bernoulli_probs) - Sample sizes (sample_sizes) - Right censoring times (quants)
Here is how we generated this data set (we do not evaluate this code, since we already generated the data set and saved it to a file):
source("exp_experiment_gen.R") exp_experiment_gen( R = 999, bernoulli_probs = c(.2, .3, .4, .5), quants = c(.1, .2, .3, .4, .5), sample_size = c(25, 50, 100, 200, 400), append = FALSE, seed = 32861, use_aneal_start = TRUE, theta = c(1, 1.1, 0.98, 1.12, 1.05), csv_filename = "exp_experiment_5.csv")
We read the data set from the file with:
df <- read.csv("exp_experiment_5.csv") kable(df[1:5, 1:10]) kable(df[1:5, 11:20]) kable(df[1:5, 21:ncol(df)])
Let's visualize the coverage probabilities:
###################### coverage probability ################## df_long <- df %>% pivot_longer( cols = starts_with("coverage"), names_to = "Coverage", values_to = "Value") # Convert the "Coverage" column to a factor for better plotting df_long$Coverage <- as.factor(df_long$Coverage) # Plot the coverage probabilities, with a solid line for the # 95% confidence level ggplot(df_long, aes(x = p, y = Value, color = Coverage)) + geom_point() + geom_line() + geom_hline(yintercept = .95, linetype = "dashed") + labs(x = "Bernoulli Probability (p)", y = "Coverage Probability", title = "Coverage Probability vs Bernoulli Probability", color = "Coverage")
Analysis here
df_long <- df %>% pivot_longer( cols = starts_with("bias"), names_to = "Bias", values_to = "Value") # Convert the "Bias" column to a factor for better plotting df_long$Bias <- as.factor(df_long$Bias) # Plot, ggplot(df_long, aes(x = p, y = Value, color = Bias)) + geom_point() + geom_line() + labs(x = "Bernoulli Probability (p)", y = "Bias", title = "Bias vs Bernoulli Pobability", color = "Bias")
Analysis here
ggplot(df) + geom_point(aes(x = p, y = mse)) + geom_line(aes(x = p, y = mse, color = "Simulation")) + geom_line(aes(x = p, y = mse_asym, color = "Asymptotic"), linetype="dashed") + labs(x = "Bernoulli Probability (p)", y = "Mean Squared Error", title = "Mean Squared Error vs Bernoulli Probability")
Analysis here
df_long <- df %>% pivot_longer( # regex to match column names "se<digit>" cols = matches("se[0-9]"), names_to = "SE", values_to = "Value") # Convert the "Coverage" column to a factor for better plotting df_long$SE <- as.factor(df_long$SE) # Plot ggplot(df_long, aes(x = p, y = Value, color = SE)) + geom_point() + geom_line() + labs(x = "Bernoulli Probability (p)", y = "SE", title = "SE vs Bernoulli Probability", color = "SE")
Analysis here
df_long <- df %>% pivot_longer( # regex to match column names "se<digit>" cols = starts_with("se_asym"), names_to = "SE", values_to = "Value") # Convert the "Coverage" column to a factor for better plotting df_long$SE <- as.factor(df_long$SE) # Plot ggplot(df_long, aes(x = p, y = Value, color = SE)) + geom_point() + geom_line() + labs(x = "Bernoulli Probability (p)", y = "SE", title = "SE vs Bernoulli Probability", color = "SE")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.