knitr::opts_chunk$set(echo = TRUE)

library(MATSS)
library(drake)
library(matssldats)

Read in the results

# define where the cache is located
db <- DBI::dbConnect(RSQLite::SQLite(), here::here("drake", "drake-cache.sqlite"))
cache <- storr::storr_dbi("datatable", "keystable", db)

lda_results <- readd(lda_results, cache = cache)
lda_ts_result_summary <- readd(lda_ts_result_summary, cache = cache)

Errors

Find LDAs that threw errors and remove them:

for (i in seq(lda_results)){
    if(!is.list(lda_results[[i]])) {
        print(names(lda_results)[i])
        print(lda_results[[i]])
        lda_results <- lda_results[-i]
    }
}

Plot LDAS

lda_result_summary <- readd(lda_result_summary, cache = cache)
datasets <- unique(lda_result_summary$data_name)

for(i in c(1, 8, 11, 13)) {

    this_name <- datasets[i]

    these_models <- dplyr::filter(lda_result_summary,
                                  data_name == this_name)
    filtered_5 <- these_models %>%
        dplyr::filter(filtered == "filtered", 
                      maxtopics == 5)
    filtered_16 <- these_models %>%
        dplyr::filter(filtered == "filtered", 
                      maxtopics == 16)
    complete_5 <- these_models %>%
        dplyr::filter(filtered == "complete", 
                      maxtopics == 5)
    complete_16 <- these_models %>%
        dplyr::filter(filtered == "complete", 
                      maxtopics == 16)

    indices <- c(
        which(names(lda_results) == filtered_5$lda_name),
        which(names(lda_results) == filtered_16$lda_name),
        which(names(lda_results) == complete_5$lda_name),
        which(names(lda_results) == complete_16$lda_name)
    )


    for(j in 1:4) {
        LDATS:::plot.LDA_VEM(lda_results[[indices[j]]][1][[1]], xname = 'Time step')
        title(main = names(lda_results)[indices[j]])
    }

}
# 
# for (i in 1:min(15, length(lda_results)))
# {
#     print(names(lda_results)[i])
#     
#     # if (names(lda_results)[i] == "lda__data")
#     # {
#     #     LDATS:::plot.LDA_VEM(lda_results[[i]][1][[1]], 
#     #                          xtime = drake::readd(_data, cache = cache)$censusdate,
#     #                          xname = 'Year')
#     #     title(main = names(lda_results)[i])
#     # } else {
#    
#     #  }
#     
# }

Summarize LDA results

lda_ts_result_summary
# 
# plot(lda_ts_result_summary$ntimeseries, lda_ts_result_summary$ntopics, 
#      main = 'Number of topics by number of timeseries', 
#      xlab = 'Number of timeseries', ylab = 'Number of topics')
# 
# plot(lda_ts_result_summary$ntimesteps, lda_ts_result_summary$ntopics, 
#      main = 'Number of topics by length of timeseries', 
#      xlab = 'Length of timeseries (number of timesteps)', ylab = 'Number of topics')

library(ggplot2)

ntopics_plot <- ggplot(data = lda_result_summary, aes(x = maxtopics, y = ntopics, color = filtered)) +
                          geom_jitter(height = 0, width = 2) +
    scale_y_continuous(breaks = c(1:16), limits = c(0, 20)) +
    scale_x_continuous(breaks = c(5, 16)) +
    theme(legend.position = "none")  +
    theme_bw() +
    facet_wrap(facets = data_name ~ .)
print(ntopics_plot)


weecology/MATSS-LDATS documentation built on Nov. 5, 2019, 12:07 p.m.