rm(list=ls())
library(devtools)
load_all()
# Configuration
seeds <- 1:500
not_seeds <- c(1, 3, 4, 6, 11, 23, 25, 26, 33, 35, 36, 37, 40, 41, 42, 45, 52, 54, 57, 61, 62, 63, 64, 65, 66, 68, 70, 72, 80, 81, 84, 87, 92, 95, 101, 107, 109, 110, 111, 114, 120, 121, 122, 127, 130, 133, 136, 149, 152, 155, 156, 158, 160, 168, 174, 180, 182, 193, 195, 196, 210, 211, 212, 217, 221, 223, 225, 233, 234, 242, 247, 252, 253, 257, 258, 259, 277, 278, 282, 285, 288, 293, 302, 313, 315, 316, 317, 319, 320, 334, 337, 338, 339, 340, 343, 347, 354, 355, 356, 357, 358, 361, 362, 365, 372, 381, 384, 389, 392, 393, 411, 412, 415, 421, 423, 428, 430, 433, 435, 439, 440, 442, 444, 445, 447, 456, 462, 466, 468, 472, 477, 479, 483, 485, 488, 493)
seeds <- seeds[-not_seeds]
B <- length(seeds)
base_input_directory <- "../dataset/scenario-correlated/"
algorithm <- 'non-cyclic-intercept'
scenario <- 'correlated'
input_directory <- paste0(base_input_directory, algorithm, '/')
output_directory <- paste0(base_input_directory, algorithm, '-results/')
informative_p <- 35
p <- 10000
non_informative_p <- p - informative_p
informative_d <- 10
d <- 25
non_informative_d <- d - informative_d
### READING RESULTS
# initialize
initial_seed <- seeds[1]
filename <- make_filename(input_directory, "beta", initial_seed)
beta <- read.csv(filename)
total_beta <- beta$x
filename <- make_filename(input_directory, "gamma", initial_seed)
gamma <- read.csv(filename)
total_gamma <- gamma$x
filename <- make_filename(input_directory, "summary", initial_seed)
summary <- read.csv(filename)
total_summary <- summary
# run on datasets
for (seed in seeds[-1]) {
result <- tryCatch({
filename <- make_filename(input_directory, "beta", seed)
beta <- read.csv(filename)
filename <- make_filename(input_directory, "gamma", seed)
gamma <- read.csv(filename)
filename <- make_filename(input_directory, "summary", seed)
summary <- read.csv(filename)
total_beta <- cbind(total_beta, beta$x)
total_gamma <- cbind(total_gamma, gamma$x)
total_summary <- rbind(total_summary, summary)
}, warning = function(w) {}, error = function(e) {
cat(seed, '\n')
}, finally = { })
}
total_beta <- total_beta[-1, ]
total_gamma <- total_gamma[-1, ]
### PROCESSING RESULTS ####
## BETA
summarize_variable_selection <- function(all_parameter_results, p, informative_p_vector) {
informative_p <- length(informative_p_vector)
non_informative_p <- p - informative_p
result <- t(apply(all_parameter_results, 2, function(column) {
non_null <- column != 0
chosen_parameters <- sum(non_null)
informative_chosen <- sum(non_null[informative_p_vector])
non_informative_chosen <- sum(non_null[-informative_p_vector])
c(chosen_parameters, informative_chosen, non_informative_chosen)
}))
P <- informative_p
N <- non_informative_p
result <- data.frame(TP=result[, 2], FP=result[, 3])
result$FN <- informative_p - result$TP
result$TN <- non_informative_p - result$FP
result$sensitivity <- result$TP/P # TPR
result$specificity <- result$TN/N # TNR
result$accuracy <- (result$TN+result$TP)/(N+P) # TNR
result$FPR <- result$FP/N # false positive rate
result$FNR <- result$FN/P # false negative rate
result$FDR <- result$FP/(result$FP+result$TP) # false discovery rate
result$FOR <- result$FN/(result$FN+result$TN) # false omission rate
result$NPV <- result$TN/(result$FN+result$TN) # false omission rate
nan_indices <- which(is.nan(result$FDR))
result <- result[-nan_indices,]
result_mean <- as.data.frame(colMeans(result))
result_median <- as.data.frame(apply(result, 2, median))
result_sd <- as.data.frame(apply(result, 2, sd))
result_summary <- as.data.frame(cbind(result_mean, result_median, result_sd))
colnames(result_summary) <- c("mean", "median", "sd")
result_summary$rownames <- c("TP", "FP", "FN", "TN", "sensitivity", "specificity", "accuracy", "FPR", "FNR", "FDR", "FOR", "NPV")
return(list(summary=result_summary, whole=result))
}
informative_p_vector <- c(2, 3, 4, 12, 13, 14, 15, 22, 23, 24, 32, 33, 34, 35, 42, 43, 44, 52,
53, 54, 55, 62, 63, 64, 72, 73, 74, 75, 82, 83, 84, 92, 93, 94, 95) - 1 # to adjust for removal of the intercept
result_beta_summary <- summarize_variable_selection(total_beta, p, informative_p_vector)
result_beta_whole <- result_beta_summary$whole
result_beta_summary <- result_beta_summary$summary
filename <- paste(algorithm, scenario, 'beta', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(result_beta_summary[, -4], full_filename, row.names=result_beta_summary$rownames)
filename <- paste(algorithm, scenario, 'beta_whole', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(result_beta_whole, full_filename)
# write pdf
# filename <- "beta_variable_selection_boxplot.pdf"
# full_filename <- paste0(output_directory, filename)
# pdf(full_filename, width=12, height=6)
# par(oma=c(0,1,0,0))
# boxplot(as.numeric(result_beta_whole$FDR), as.numeric(result_beta_whole$sensitivity),
# as.numeric(result_beta_whole$specificity), horizontal=TRUE, axes=FALSE,
# main="Variable selection metrics for beta (correlated scenario, with intercept)")
# axis(1)
# axis(2, labels=c("FDR", "Sensitivity", "Specificity"), at=1:3, las=2)
# box()
# par(oma=c(0,0,0,0))
# dev.off()
# GAMMA
informative_d_vector <- c(2, 4, 6, 8, 10, 12, 15, 18, 21, 24) - 1 # to adjust for the intercept
result_gamma_summary <- summarize_variable_selection(total_gamma, d, informative_d_vector)
result_gamma_whole <- result_gamma_summary$whole
result_gamma_summary <- result_gamma_summary$summary
filename <- paste(algorithm, scenario, 'gamma', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(result_gamma_summary[, -4], full_filename, row.names=result_gamma_summary$rownames)
filename <- paste(algorithm, scenario, 'gamma_whole', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(result_gamma_whole, full_filename)
#
#
# filename <- "gamma_variable_selection_boxplot.pdf"
# full_filename <- paste0(output_directory, filename)
# pdf(full_filename, width=12, height=6)
# par(oma=c(0,1,0,0))
# boxplot(as.numeric(result_gamma_whole$FDR), as.numeric(result_gamma_whole$sensitivity),
# as.numeric(result_gamma_whole$specificity), horizontal=TRUE, axes=FALSE,
# main="Variable selection metrics for gamma (correlated scenario, with intercept)")
# axis(1)
# axis(2, labels=c("FDR", "Sensitivity", "Specificity"), at=1:3, las=2)
# box()
# par(oma=c(0,1,0,0))
# dev.off()
## ALL (results from each run)
summary_means <- colMeans(total_summary)
summary_sd <- apply(total_summary, 2, sd)
summary_min <- apply(total_summary, 2, min)
summary_max <- apply(total_summary, 2, max)
summary_median <- apply(total_summary, 2, median)
summary <- cbind(summary_means, summary_median, summary_sd, summary_min, summary_max)
colnames(summary) <- c("mean", "median", "sd", "min", "max")
summary <- as.data.frame(summary)
plot_file <- function(seed) {
seed_string <- formatC(seed, width=4, flag="0")
filename <- paste0('../dataset/scenario-correlated/non-cyclic-no-intercept/', seed_string, '_cv_loglik.csv')
df <- read.csv(filename)
plot(rowMeans(df), typ='l', ylim=c(min(apply(df, 2, min)), max(apply(df, 2, max))))
for (k in 1:5) { lines(df[, k], lty=3) }
return(rowMeans(df))
}
lol <- FALSE
if (lol) {
base_input_directory <- "../dataset/scenario-correlated/"
algorithm_no <- 'non-cyclic-no-intercept'
scenario <- 'correlated'
input_directory_no <- paste0(base_input_directory, algorithm_no, '/')
algorithm_yes <- 'non-cyclic-intercept'
input_directory_yes <- paste0(base_input_directory, algorithm_yes, '/')
errs_loglik_no <- c()
errs_dev_no <- c()
errs_loglik_yes <- c()
errs_dev_yes <- c()
for (seed in seeds) {
seed_string <- formatC(seed, width=4, flag="0")
# no intercept
filename <- paste0(input_directory_no, seed_string, '_cv_loglik.csv')
df <- read.csv(filename)
errs_loglik_no <- cbind(rowMeans(df), errs_loglik_no)
filename <- paste0(input_directory_no, seed_string, '_cv_deviance.csv')
df <- read.csv(filename)
errs_dev_no <- cbind(rowMeans(df), errs_dev_no)
# intercept
filename <- paste0(input_directory_yes, seed_string, '_cv_loglik.csv')
df <- read.csv(filename)
errs_loglik_yes <- cbind(rowMeans(df), errs_loglik_yes)
filename <- paste0(input_directory_yes, seed_string, '_cv_deviance.csv')
df <- read.csv(filename)
errs_dev_yes <- cbind(rowMeans(df), errs_dev_yes)
}
#pdf(paste0(paste(scenario, "cv", "loglik", sep='_'), '.pdf'), width=12, height=6)
errs_loglik_yes_sd <- apply(errs_loglik_yes, 1, sd)
errs_loglik_yes <- rowMeans(errs_loglik_yes)
errs_loglik_no_sd <- apply(errs_loglik_no, 1, sd)
errs_loglik_no <- rowMeans(errs_loglik_no)
errs_dev_yes_sd <- apply(errs_dev_yes, 1, sd)
errs_dev_yes <- rowMeans(errs_dev_yes)
errs_dev_no_sd <- apply(errs_dev_no, 1, sd)
errs_dev_no <- rowMeans(errs_dev_no)
ylim_loglik <- c(min(min(errs_loglik_yes), min(errs_loglik_no)), max(max(errs_loglik_yes), max(errs_loglik_no)))
ylim_loglik <- c(1100, 1300)
plot(errs_loglik_yes, typ='l', ylab="Log-likelihood", xlab="Iteration", ylim=ylim_loglik, xlim=c(2, 120))
lines(errs_loglik_no, typ='l', col='red')
#dev.off()
#pdf(paste0(paste(scenario, "cv", "deviance", sep='_'), '.pdf'), width=12, height=6)
ylim_dev <- c(min(min(errs_dev_yes), min(errs_dev_no)), max(max(errs_dev_yes), max(errs_dev_no)))
ylim_dev <- c(-200, 200)
plot(errs_dev_yes, typ='l', ylab="Deviance", xlab="Iteration", ylim=ylim_dev, xlim=c(2, 120))
lines(errs_dev_yes - 1.96*errs_dev_yes_sd)
lines(errs_dev_yes + 1.96*errs_dev_yes_sd)
lines(errs_dev_no, typ='l', col='red')
lines(errs_dev_no - 1.96*errs_dev_no_sd, col='red')
lines(errs_dev_no + 1.96*errs_dev_no_sd, col='red')
#dev.off()
}
filename <- paste(algorithm, scenario, 'summary', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(summary, full_filename)
filename <- paste(algorithm, scenario, 'total_summary', sep='_')
full_filename <- paste(output_directory, filename, '.csv', sep='')
write.csv(total_summary, full_filename)
loglik_ylim <- c(min(min(total_summary$null_loglik), min(total_summary$loglik)), max(max(total_summary$null_loglik), max(total_summary$loglik)))
# Plot log likelihood and null likelihoods
plot(total_summary$loglik, ylim=loglik_ylim)
points(total_summary$null_loglik, pch='+', col='red')
total_summary_no <- read.csv('../dataset/scenario-correlated/non-cyclic-no-intercept-results/non-cyclic-no-intercept_correlated_total_summary.csv')
total_summary_yes <- read.csv('../dataset/scenario-correlated/non-cyclic-intercept-results/non-cyclic-intercept_correlated_total_summary.csv')
# # Plot deviance
# ylim_deviance <- c(min(total_summary$deviance), max(total_summary$deviance))
# filename <- paste0(paste(scenario, algorithm, "cv", "deviance", sep='_'), '.pdf')
# filename <- paste0('../../text/figures/', filename)
# pdf(filename, width=12, height=6)
# plot(total_summary$deviance, ylim=ylim_deviance, xlab="Seed", ylab="Deviance")
# hist(total_summary_yes$deviance, ylim=c(0, 40), col=rgb(red=1,green=0,blue=0, alpha=0.2), breaks=seq(-100, 250, 10), xlab="Deviance", main="Deviances on test set")
# hist(total_summary_no$deviance, ylim=c(0, 40), col=rgb(red=0,green=0,blue=1, alpha=0.2), breaks=seq(-100, 250, 10), xlab="Deviance", main="Deviances on test set", add=T)
# abline(v=0, col='red', lwd=3)
# dev.off()
# deviance_no_intercept <- -total_summary_no$deviance
# deviance_intercept <- -total_summary_yes$deviance
# full_filename <- paste0(base_input_directory, 'correlated_deviance_both_boxplot_no_title.pdf')
# pdf(full_filename, width=12, height=6)
# boxplot(deviance_intercept, deviance_no_intercept, xlab='Difference in deviance', horizontal=TRUE)
# axis(2, labels=c("With", "Without"), at=1:2, las=2)
# abline(v=0, lty=3)
# dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.