library(dplyr) library(mclust) library(replicatebecs)
Playing around with Gaussian mixture models in mclust
.
As I understand it, this should run a GMM with 1:15
modes on a vector of data, allowing for unequal variances (modelNames = "V"
; use modelNames = "E"
for equal variances) and calculate BIC for each one, allowing for model selection:
mclust::Mclust(data, G = c(1:15), modelNames = "V", prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive())
To test this out I'm going to generate some toy data with a known number of modes by sticking together a community.
set.seed(326) nspecies <- 5 species_data <- data.frame(species_id = c(1:nspecies), species_mean_mass = runif(n = nspecies, min = 10, max = 150), species_abundance = ceiling(runif(n = nspecies, min = 20, max = 200))) %>% dplyr::mutate(species_variance = .01 * species_mean_mass) map_abund <- function(X, abund_column) { return(rep(X, times = abund_column[X])) } toy_isd <- sapply(X = 1:nspecies, FUN = map_abund, abund_column = species_data$species_abundance) %>% unlist() %>% as.data.frame() %>% dplyr::rename(species_id = '.') %>% dplyr::left_join(species_data, by = 'species_id') %>% dplyr::mutate(individual_mass = rnorm(n = sum(species_data$species_abundance), mean = species_mean_mass, sd = species_variance))%>% dplyr::select(species_id, individual_mass) %>% dplyr::rename(individual_sizes = individual_mass) %>% replicatebecs::add_energy_sizeclass() %>% dplyr::mutate(ln_size = log(individual_sizes), ln_energy = log(individual_energy)) toy_bsed <- toy_isd %>% replicatebecs::make_bsed() plot_bsed(toy_bsed) isd_plot <- ggplot2::ggplot(data = toy_isd, ggplot2::aes(toy_isd$ln_size, xmin = 0, xmax = 1)) + ggplot2::geom_histogram(data = toy_isd, stat = "bin", binwidth = 0.01, show.legend = NA, inherit.aes = TRUE) + ggplot2::labs(x = "ln(size)", y = "Number of individuals", title = "Individuals size distribution") + ggplot2::theme_bw() ied_plot <- ggplot2::ggplot(data = toy_isd, ggplot2::aes(toy_isd$ln_energy, xmin = 0, xmax = 1)) + ggplot2::geom_histogram(data = toy_isd, stat = "bin", binwidth = 0.01, show.legend = NA, inherit.aes = TRUE) + ggplot2::labs(x = "ln(energy)", y = "Number of individuals", title = "Individuals energy distribution") + ggplot2::theme_bw() isd_plot ied_plot
MCLUST on ISD...
gmm_isd <- mclust::Mclust(toy_isd$ln_size, G = 1:15, modelNames = "V", prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive()) gmm_isd gmm_isd$BIC gmm_isd$parameters$mean print(plot(gmm_isd, what = c("density")))
Trying it on real data...
andrews <- replicatebecs::load_paper_data()[[1]] %>% replicatebecs::add_energy_sizeclass() %>% dplyr::mutate(ln_size = log(individual_sizes), ln_energy = log(individual_energy)) andrews_isd_plot <- ggplot2::ggplot(data = andrews, ggplot2::aes(andrews$ln_size, xmin = 0, xmax = 1)) + ggplot2::geom_histogram(data = andrews, stat = "bin", binwidth = 0.01, show.legend = NA, inherit.aes = TRUE) + ggplot2::labs(x = "ln(size)", y = "Number of individuals", title = "Individuals size distribution") + ggplot2::theme_bw() andrews_isd_plot gmm_andrews <- mclust::Mclust(andrews$ln_size, G = 1:15, modelNames = "V", prior = NULL, control = emControl(), initialization = NULL, warn = mclust.options("warn"), x = NULL, verbose = interactive()) gmm_andrews gmm_andrews$BIC gmm_andrews$parameters$mean print(plot(gmm_andrews, what = c("density")))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.