knitr::opts_chunk$set(echo = TRUE) library(MATSS) library(drake) library(matssldats)
# define where the cache is located db <- DBI::dbConnect(RSQLite::SQLite(), here::here("drake", "drake-cache.sqlite")) cache <- storr::storr_dbi("datatable", "keystable", db) lda_results <- readd(lda_results, cache = cache) lda_ts_result_summary <- readd(lda_ts_result_summary, cache = cache)
Find LDAs that threw errors and remove them:
for (i in seq(lda_results)){ if(!is.list(lda_results[[i]])) { print(names(lda_results)[i]) print(lda_results[[i]]) lda_results <- lda_results[-i] } }
lda_result_summary <- readd(lda_result_summary, cache = cache) datasets <- unique(lda_result_summary$data_name) for(i in c(1, 8, 11, 13)) { this_name <- datasets[i] these_models <- dplyr::filter(lda_result_summary, data_name == this_name) filtered_5 <- these_models %>% dplyr::filter(filtered == "filtered", maxtopics == 5) filtered_16 <- these_models %>% dplyr::filter(filtered == "filtered", maxtopics == 16) complete_5 <- these_models %>% dplyr::filter(filtered == "complete", maxtopics == 5) complete_16 <- these_models %>% dplyr::filter(filtered == "complete", maxtopics == 16) indices <- c( which(names(lda_results) == filtered_5$lda_name), which(names(lda_results) == filtered_16$lda_name), which(names(lda_results) == complete_5$lda_name), which(names(lda_results) == complete_16$lda_name) ) for(j in 1:4) { LDATS:::plot.LDA_VEM(lda_results[[indices[j]]][1][[1]], xname = 'Time step') title(main = names(lda_results)[indices[j]]) } } # # for (i in 1:min(15, length(lda_results))) # { # print(names(lda_results)[i]) # # # if (names(lda_results)[i] == "lda__data") # # { # # LDATS:::plot.LDA_VEM(lda_results[[i]][1][[1]], # # xtime = drake::readd(_data, cache = cache)$censusdate, # # xname = 'Year') # # title(main = names(lda_results)[i]) # # } else { # # # } # # }
lda_ts_result_summary
# # plot(lda_ts_result_summary$ntimeseries, lda_ts_result_summary$ntopics, # main = 'Number of topics by number of timeseries', # xlab = 'Number of timeseries', ylab = 'Number of topics') # # plot(lda_ts_result_summary$ntimesteps, lda_ts_result_summary$ntopics, # main = 'Number of topics by length of timeseries', # xlab = 'Length of timeseries (number of timesteps)', ylab = 'Number of topics') library(ggplot2) ntopics_plot <- ggplot(data = lda_result_summary, aes(x = maxtopics, y = ntopics, color = filtered)) + geom_jitter(height = 0, width = 2) + scale_y_continuous(breaks = c(1:16), limits = c(0, 20)) + scale_x_continuous(breaks = c(5, 16)) + theme(legend.position = "none") + theme_bw() + facet_wrap(facets = data_name ~ .) print(ntopics_plot)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.