knitr::opts_chunk$set(echo = TRUE)

library(MATSS)
library(matssldats)
library(drake)


library(ggplot2) 
library(cowplot)
library(magick)

Read in the results

# define where the cache is located
db <- DBI::dbConnect(RSQLite::SQLite(), here::here("drake", "drake-cache.sqlite"))
cache <- storr::storr_dbi("datatable", "keystable", db)

ts_results <- readd(ts_results, cache = cache)

selected_ts_results <- readd(ts_select_results, cache = cache)

Errors

Find TS models that threw errors while running and remove them:

ts_errors <- NULL
for (i in seq(ts_results)){
    if(!is.list(ts_results[[i]])) {
        print(names(ts_results)[i])
        print(ts_results[[i]])
        ts_errors <- c(ts_errors, i)
    }
}
ts_results <- ts_results[seq(ts_results)[which(!(seq(ts_results) %in% ts_errors))]]

These TS models ran successfully:

# Remaining ts models:
print(names(ts_results))

Find TS models that threw errors in selection and remove them:

ts_select_errors <- NULL
for (i in seq(selected_ts_results)){
    if(!is.list(selected_ts_results[[i]])) {
        print(names(selected_ts_results)[i])
        print(selected_ts_results[[i]])
        ts_select_errors <- c(ts_select_errors, i)
    }
}
selected_ts_results <- selected_ts_results[seq(selected_ts_results)[which(!(seq(selected_ts_results) %in% ts_select_errors))]]

These TS models were selected correctly:

# Remaining ts models:
print(names(selected_ts_results))

Community-level results

lda_ts_result_summary <- readd(lda_ts_result_summary, cache = cache)
lda_ts_result_summary

Cross-community results

plot(lda_ts_result_summary$ntopics, lda_ts_result_summary$nchangepoints, 
     main = 'Number of changepoints by number of LDA topics', 
     xlab = 'Number of LDA topics', ylab = 'Number of changepoints')

plot(lda_ts_result_summary$ntimesteps, lda_ts_result_summary$nchangepoints, 
     main = 'Number of changepoints by length of timeseries', 
     xlab = 'Length of timeseries (number of timesteps)', ylab = 'Number of changepoints')

Detailed model results

#ts_models_summary <- readd(ts_models_summary, cache = cache)

ts_results <- readd(ts_results, cache =cache)
ts_models_summary <- collect_ts_result_models_summary(ts_results)

nb_close_summary <- ts_models_summary %>%
    dplyr::mutate(AIC = as.numeric(AIC)) %>%
    dplyr::group_by(data_name, filtered_topics) %>%
    dplyr::mutate(min_AIC = min(AIC)) %>%
    dplyr::mutate(delta_AIC = AIC - min_AIC) %>%
    dplyr::mutate(is_close = abs(delta_AIC) <= 2) %>%
    dplyr::summarize(nb_close = sum(is_close)) %>%
    dplyr::ungroup()

library(ggplot2)

ts_aic_plot <- ggplot(data = ts_models_summary, aes(x = filtered_topics,
                                                    y = AIC,
                                                    color = cpts_formula)) +
    geom_point() +
    ylab("AIC") +
    scale_y_discrete(labels = NULL) +
    theme_bw() +
    facet_wrap(facets = data_name ~ .)


print(ts_aic_plot)

nb_close_plot <- ggplot(data = nb_close_summary, aes(x = filtered_topics,
                                                     y = nb_close)) +
    geom_jitter(height = 0, width = .05) +
    theme(legend.position = "none")  +
    theme_bw() +
    facet_wrap(facets = data_name ~ .)
print(nb_close_plot)
lda_ts_result_summary$filtered_topics <- paste(lda_ts_result_summary$filtered, 
                                               lda_ts_result_summary$topics,
                                               sep= "_")

ncpts_lot <- ggplot(data = lda_ts_result_summary, aes(x = maxtopics, y = nchangepoints, color = gen_formula)) +
    geom_jitter(height = 0) +
    theme(legend.position = "none")  +
    theme_bw() +
    facet_wrap(facets = filtered ~ .)
ncpts_lot
lda_results <- readd(lda_results, cache = cache)
lda_ts_result_summary <- lda_ts_result_summary %>%
    dplyr::filter(data_name != "sgs_data") %>%
    dplyr::filter(data_name != "jornada_data") %>%
    dplyr::filter(data_name != "maizuru_data") %>%
    dplyr::arrange(dplyr::desc(data_name), dplyr::desc(filtered),
                   dplyr::desc(maxtopics),
                   dplyr::desc(gen_formula), dplyr::desc(nchangepoints))


dir.create(here::here("analysis", "reports", "lda_ts_plots"))

for(i in 1:nrow(lda_ts_result_summary)) {
    if(is.na(lda_ts_result_summary$ts_name[i])) {
        next
    }

    if(!dir.exists(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i]))) {
        dir.create(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i]))
    }

    this_ts <- selected_ts_results [[ which(names(selected_ts_results) == lda_ts_result_summary$ts_name[i])]]
    this_lda <- lda_results [[ which(names(lda_results) == lda_ts_result_summary$lda_name[i])]][[1]]

    lda_cols <- LDATS::set_LDA_plot_colors(this_lda)
    jpeg(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "lda.jpg"))
    plot(this_lda, cols = lda_cols)
    dev.off()
    jpeg(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "ts.jpg"))
    ts_cols <- LDATS::set_gamma_colors(this_ts, cols = lda_cols)
    LDATS::pred_gamma_TS_plot(this_ts, cols = ts_cols, xlab = "Time")
    dev.off()

    p1 <- ggdraw() + draw_image(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "lda.jpg")) +
        draw_figure_label(label = paste(lda_ts_result_summary$data_name[i],
                                        lda_ts_result_summary$filtered[i]), position = "top.left")
    p2 <- ggdraw()+ draw_image(here::here("analysis", "reports", "lda_ts_plots", lda_ts_result_summary$ts_name[i], "ts.jpg")) + 
        draw_figure_label(label = paste("max",
                                        lda_ts_result_summary$maxtopics[i],
                                        "topics,",
                                        lda_ts_result_summary$nchangepoints[i],
                                        "cpts,",
                                        lda_ts_result_summary$gen_formula[i]),
                          position = "top.left")

    gridExtra::grid.arrange(grobs = list(p1, p2), nrow = 1, ncol =2 )
}


weecology/MATSS-LDATS documentation built on Nov. 5, 2019, 12:07 p.m.