# analyse results
library(distbayesianmc)
library(gridExtra)
library(grid)
setwd("~/R_programming/distbayesianmc")
#source("~/R_programming/distbayesianmc/params_simulation/params_logit.R")
#setwd("./sim_results/logistic/") # use this for the pima dataset
setwd("/scratch/alexander/distbayesianmc_logit/")
iters <- 20
vec_datasets <- c("flights_complex1", "flights_complex2")#, )#, "higgs1_full", "higgs2_full")
#vec_datasets <- c("higgs1_large", "higgs2_large")#, "higgs1_full", "higgs2_full")
#vec_datasets <- c("pima")
vec_types_splits_exact <- c("random", "strat_y_cluster")
vec_types_splits_approx <- c("random")
# multiple simulations
#vec_splits <- c(1,2,3,5,10,20)
vec_splits <- c(10,20,50)#,100)
df_logistic <- f_combine_const_data_in_frame(vec_splits, vec_datasets, vec_types_splits_approx, iters, type_sim = "stan_")
df_logistic_exact <- f_combine_const_data_in_frame(vec_splits, vec_datasets, vec_types_splits_exact, iters, type_sim = "")
df_logistic_reduced <- df_logistic %>% select(normconstcombined, splits, dataset, typesplit)
df_logistic_exact_reduced <- df_logistic_exact %>% select(normconstcombined, splits, dataset, typesplit)
df_logistic_exact_reduced %<>% mutate(model = replace(dataset, dataset=="flights_complex1", "cond")) %>% mutate(model = replace(model, model=="flights_complex2", "2 cond"))
df_logistic_exact_reduced %<>% mutate(model_strat = replace(typesplit, typesplit=="strat_y_cluster", "stratified")) %>% mutate(model_strat = paste(model_strat, model, sep= " "))
df_logistic_reduced %<>% mutate(model = replace(dataset, dataset=="flights_complex1", "approx")) %>% mutate(model = replace(model, model=="flights_complex2", "2 approx"))
df_logistic_reduced %<>% mutate(model_strat = replace(typesplit, typesplit=="strat_y_cluster", "stratified")) %>% mutate(model_strat = paste(model_strat, model, sep= " "))
df_all <- rbind(df_logistic_reduced, df_logistic_exact_reduced)
library(ggplot2)
p1 <- ggplot(df_all %>% filter(splits != "100", typesplit == "random") , aes_string(x="splits", y="normconstcombined", fill="model")) +
geom_boxplot() + theme_minimal() + labs(fill = "", y = "log evidence", title="Estimated evidence flights data, both models") +
theme(plot.title = element_text(hjust = 0.5, size=22),
text = element_text(size=22),
axis.title.x = element_text(size=22),
axis.title.y = element_text(size=22),
legend.position="none", # "bottom",
) + scale_fill_manual(values=c("#0fe600", "#E69F00", "#56B4E9", "#E600B0")) +
scale_x_discrete(labels=c("10 \n(32,734)", "20 \n(16,367)", "50 \n(6,547)") )+
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) + scale_y_continuous(labels = scientific) +
geom_vline(xintercept = c(1.5, 2.5)) +#guides(fill=guide_legend(nrow=2,byrow=TRUE)) +
annotate("text", x = 0.72, y = -147000, label = "1a", size= 5) + annotate("text", x = 0.89, y = -147000, label = "1c", size= 5) +
annotate("text", x = 1.07, y = -146500, label = "2a", size= 5) + annotate("text", x = 1.27, y = -147000, label = "2c", size= 5) +
annotate("text", x = 1.72, y = -147000, label = "1a", size= 5) + annotate("text", x = 1.89, y = -147000, label = "1c", size= 5) +
annotate("text", x = 2.07, y = -146500, label = "2a", size= 5) + annotate("text", x = 2.27, y = -148000, label = "2c", size= 5) +
annotate("text", x = 2.72, y = -147000, label = "1a", size= 5) + annotate("text", x = 2.89, y = -147500, label = "1c", size= 5) +
annotate("text", x = 3.07, y = -146500, label = "2a", size= 5) + annotate("text", x = 3.27, y = -153000, label = "2c", size= 5)
ggsave("flightsdata_comp.pdf", plot = p1, width = 12, height = 6)
p1
p2 <- ggplot(df_all %>% filter(model != "2 exact", typesplit == "random") %>% filter(splits != "100") , aes_string(x="splits", y="normconstcombined", fill="model")) +
geom_boxplot() + theme_light() + labs(title="Zoom")+
theme(plot.title = element_text(hjust = 0.5, size=16),
text = element_text(size=18),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none"
) + scale_fill_manual(values=c("#0fe600", "#E69F00", "#56B4E9")) +
scale_x_discrete(labels=c("10 \n(32,734)", "20 \n(16,367)", "50 \n(6,547)") )+
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
geom_vline(xintercept = c(1.5, 2.5))
p2
p3 <- ggplot(df_all %>% filter(model != "2 approx", model != "2 cond") , aes_string(x="splits", y="normconstcombined", fill="model_strat")) +
geom_boxplot() + theme_minimal() + labs(fill = "", y = "log evidence", title="Estimated evidence flights data") +
theme(plot.title = element_text(hjust = 0.5, size=22),
text = element_text(size=22),
axis.title.x = element_text(size=22),
axis.title.y = element_text(size=22),
legend.position="bottom",
) + scale_fill_manual(values=c("#0fe600", "#E69F00", "#56B4E9", "#E600B0")) +
scale_x_discrete(labels=c("10 \n(32,734)", "20 \n(16,367)", "50 \n(6,547)") )+
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
geom_vline(xintercept = c(1.5, 2.5)) +guides(fill=guide_legend(nrow=1,byrow=TRUE)) + geom_hline(aes(yintercept=-147548), colour="#990000")
ggsave("flightsdata_m1.pdf", plot = p3, width = 12, height = 6)
p3
library(ggpubr)
figure <- ggarrange(p1, p3,
#labels = c("A", "B"),
ncol = 2)
ggsave("flightsdata.pdf", plot = figure, width = 10, height = 7)
g2 <- ggplotGrob(p2)
p3 = p1 + annotation_custom(grob = g2, xmin = 0.5, xmax = 4, ymin=-190000, ymax=-162000)
ggsave("flightsdata.pdf", plot = p3, width = 7, height = 4)
plot(p3)
# calculate normalising constant using IS
dataset_loaded <- f_dataset_loader("flights_complex2")
splitted_data <- f_pack_split_data(dataset_loaded$X, dataset_loaded$y, ssplits=1, iseed=1, typesplit="random")
splitted_data <- f_prep_prior_logistic(splitted_data, scale = 1)
negf_posterior <- function(x, params.=params) -f_loglik(x, params = params.)-f_logprior(x, params = params.)
resoptim <- optim(splitted_data[[1]]$bprior, negf_posterior, params=splitted_data[[1]], control= c(maxit=10000), hessian=TRUE)
covarproxy <- 2*diag(diag(solve(resoptim$hessian)))
muproxy <- resoptim$par
samples_is <- rmvnorm(n= 200, mean= muproxy, sigma=covarproxy)
logweights <- f_loglik(samples_is, params = splitted_data[[1]])+f_logprior(samples_is, params = splitted_data[[1]]) - dmvnorm(samples_is, mean=muproxy, sigma=covarproxy, log=TRUE)
normconstis <- log_sum_exp(logweights)-log(1000)
#normconstis <- log(mean(exp(logweights)))
df_all %>% mutate(sqerror = (normconstcombined - normconstis)**2) %>% group_by(dataset, model, splits, model_strat) %>% summarise(MSE = mean(sqerror)**0.5/mean(normconstcombined)*100)
library(gridExtra)
library(grid)
t1 = arrangeGrob(g1,ncol=1, left = textGrob("A", y = 1, vjust=1, gp=gpar(fontsize=20)))
t2 = arrangeGrob(g2,ncol=1, left = textGrob("B", y = 1, vjust=1, gp=gpar(fontsize=20)))
final = arrangeGrob(t1,t2, layout_matrix = cbind(c(1,2), c(3,3)))
grid.arrange(final)
setwd("~/R_programming/distbayesianmc")
source("~/R_programming/distbayesianmc/params_simulation/params_probit.R")
setwd("./sim_results/probit/")
df_probit <- f_combine_const_data_in_frame(vec_splits, vec_datasets, vec_types_splits, iters)
library(dplyr)
df_logistic$model <- "logit"
df_probit$model <- "probit"
df_logistic_exact$model <- "logit_ex"
df_logistic_reduced <- df_logistic %>% select(normconstcombined, model, splits, iter)
df_probit_reduced <-df_probit %>% select(normconstcombined, model, splits, iter)
df_logistic_exact <- df_logistic_exact %>% select(normconstcombined, model, splits, iter)
df_all <- rbind(df_logistic_reduced, df_probit_reduced, df_logistic_exact)
df_all %<>% mutate(splitsnew = as.character(splits))
library(ggplot2)
p1 <- ggplot(df_all %>% filter(splitsnew != "20"), aes_string(x="splits", y="normconstcombined", fill="model")) +
geom_boxplot() + scale_x_discrete(limits=as.character(sort(as.numeric(levels(df$splits))))) + theme_minimal()# + theme_gray()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.