knitr::opts_chunk$set(echo = TRUE) library(MATSS) library(matssldats) library(drake) library(ggplot2) library(cowplot) library(magick)
# define where the cache is located db <- DBI::dbConnect(RSQLite::SQLite(), here::here("drake", "drake-cache.sqlite")) cache <- storr::storr_dbi("datatable", "keystable", db) ts_results <- readd(ts_results, cache = cache) selected_ts_results <- readd(ts_select_results, cache = cache)
Find TS models that threw errors while running and remove them:
ts_errors <- NULL for (i in seq(ts_results)){ if(!is.list(ts_results[[i]])) { print(names(ts_results)[i]) print(ts_results[[i]]) ts_errors <- c(ts_errors, i) } } ts_results <- ts_results[seq(ts_results)[which(!(seq(ts_results) %in% ts_errors))]]
These TS models ran successfully:
# Remaining ts models: print(names(ts_results))
Find TS models that threw errors in selection and remove them:
ts_select_errors <- NULL for (i in seq(selected_ts_results)){ if(!is.list(selected_ts_results[[i]])) { print(names(selected_ts_results)[i]) print(selected_ts_results[[i]]) ts_select_errors <- c(ts_select_errors, i) } } selected_ts_results <- selected_ts_results[seq(selected_ts_results)[which(!(seq(selected_ts_results) %in% ts_select_errors))]]
These TS models were selected correctly:
# Remaining ts models: print(names(selected_ts_results))
lda_ts_result_summary <- readd(lda_ts_result_summary, cache = cache) lda_ts_result_summary
plot(lda_ts_result_summary$ntopics, lda_ts_result_summary$nchangepoints, main = 'Number of changepoints by number of LDA topics', xlab = 'Number of LDA topics', ylab = 'Number of changepoints') plot(lda_ts_result_summary$ntimesteps, lda_ts_result_summary$nchangepoints, main = 'Number of changepoints by length of timeseries', xlab = 'Length of timeseries (number of timesteps)', ylab = 'Number of changepoints')
#ts_models_summary <- readd(ts_models_summary, cache = cache) ts_results <- readd(ts_results, cache =cache) ts_models_summary <- collect_ts_result_models_summary(ts_results) nb_close_summary <- ts_models_summary %>% dplyr::mutate(AIC = as.numeric(AIC)) %>% dplyr::group_by(data_name, filtered_topics) %>% dplyr::mutate(min_AIC = min(AIC)) %>% dplyr::mutate(delta_AIC = AIC - min_AIC) %>% dplyr::mutate(is_close = abs(delta_AIC) <= 2) %>% dplyr::summarize(nb_close = sum(is_close)) %>% dplyr::ungroup() library(ggplot2) ts_aic_plot <- ggplot(data = ts_models_summary, aes(x = filtered_topics, y = AIC, color = cpts_formula)) + geom_point() + ylab("AIC") + scale_y_discrete(labels = NULL) + theme_bw() + facet_wrap(facets = data_name ~ .) print(ts_aic_plot) nb_close_plot <- ggplot(data = nb_close_summary, aes(x = filtered_topics, y = nb_close)) + geom_jitter(height = 0, width = .05) + theme(legend.position = "none") + theme_bw() + facet_wrap(facets = data_name ~ .) print(nb_close_plot)
lda_ts_result_summary$filtered_topics <- paste(lda_ts_result_summary$filtered, lda_ts_result_summary$topics, sep= "_") ncpts_lot <- ggplot(data = lda_ts_result_summary, aes(x = maxtopics, y = nchangepoints, color = gen_formula)) + geom_jitter(height = 0) + theme(legend.position = "none") + theme_bw() + facet_wrap(facets = filtered ~ .) ncpts_lot
lda_results <- readd(lda_results, cache = cache) lda_ts_result_summary <- lda_ts_result_summary %>% dplyr::filter(data_name != "sgs_data") %>% dplyr::filter(data_name != "jornada_data") %>% dplyr::filter(data_name != "maizuru_data") %>% dplyr::arrange(dplyr::desc(data_name), dplyr::desc(filtered), dplyr::desc(maxtopics), dplyr::desc(gen_formula), dplyr::desc(nchangepoints)) dir.create(here::here("analysis", "reports", "lda_ts_plots")) for(i in 1:nrow(lda_ts_result_summary)) { if(is.na(lda_ts_result_summary$ts_name[i])) { next } if(!dir.exists(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i]))) { dir.create(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i])) } this_ts <- selected_ts_results [[ which(names(selected_ts_results) == lda_ts_result_summary$ts_name[i])]] this_lda <- lda_results [[ which(names(lda_results) == lda_ts_result_summary$lda_name[i])]][[1]] lda_cols <- LDATS::set_LDA_plot_colors(this_lda) jpeg(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "lda.jpg")) plot(this_lda, cols = lda_cols) dev.off() jpeg(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "ts.jpg")) ts_cols <- LDATS::set_gamma_colors(this_ts, cols = lda_cols) LDATS::pred_gamma_TS_plot(this_ts, cols = ts_cols, xlab = "Time") dev.off() p1 <- ggdraw() + draw_image(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "lda.jpg")) + draw_figure_label(label = paste(lda_ts_result_summary$data_name[i], lda_ts_result_summary$filtered[i]), position = "top.left") p2 <- ggdraw()+ draw_image(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "ts.jpg")) + draw_figure_label(label = paste("max", lda_ts_result_summary$maxtopics[i], "topics,", lda_ts_result_summary$nchangepoints[i], "cpts,", lda_ts_result_summary$gen_formula[i]), position = "top.left") gridExtra::grid.arrange(grobs = list(p1, p2), nrow = 1, ncol =2 ) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.