library(dplyr)
library(mclust)
library(replicatebecs)

Playing around with Gaussian mixture models in mclust.

As I understand it, this should run a GMM with 1:15 modes on a vector of data, allowing for unequal variances (modelNames = "V"; use modelNames = "E" for equal variances) and calculate BIC for each one, allowing for model selection:

mclust::Mclust(data, G = c(1:15), modelNames = "V", 
     prior = NULL, 
     control = emControl(), 
     initialization = NULL, 
     warn = mclust.options("warn"), 
     x =  NULL, 
     verbose = interactive())

To test this out I'm going to generate some toy data with a known number of modes by sticking together a community.

set.seed(326)
nspecies <- 5

species_data <- data.frame(species_id = c(1:nspecies), 
                           species_mean_mass = runif(n = nspecies,
                                                     min = 10, max = 150), 
                           species_abundance = ceiling(runif(n = nspecies, 
                                                     min = 20, max = 200))) %>%
  dplyr::mutate(species_variance = .01 * species_mean_mass)

map_abund <- function(X, abund_column) {
  return(rep(X, times = abund_column[X]))
}

toy_isd <- sapply(X = 1:nspecies, FUN = map_abund, abund_column = species_data$species_abundance) %>%
  unlist() %>%
  as.data.frame() %>%
  dplyr::rename(species_id = '.') %>%
  dplyr::left_join(species_data, by = 'species_id') %>%
  dplyr::mutate(individual_mass = rnorm(n = sum(species_data$species_abundance), mean = species_mean_mass,
                                        sd = species_variance))%>%
  dplyr::select(species_id, individual_mass) %>%
  dplyr::rename(individual_sizes = individual_mass) %>%
  replicatebecs::add_energy_sizeclass() %>%
  dplyr::mutate(ln_size = log(individual_sizes), ln_energy = log(individual_energy))

toy_bsed <- toy_isd %>%
  replicatebecs::make_bsed()

plot_bsed(toy_bsed)


isd_plot <- ggplot2::ggplot(data = toy_isd, ggplot2::aes(toy_isd$ln_size, xmin = 0, xmax = 1)) +
    ggplot2::geom_histogram(data = toy_isd, stat = "bin", 
                            binwidth = 0.01,
                            show.legend = NA,
                            inherit.aes = TRUE)  +
    ggplot2::labs(x = "ln(size)", y = "Number of individuals", title = "Individuals size distribution") +
    ggplot2::theme_bw()

ied_plot <- ggplot2::ggplot(data = toy_isd, ggplot2::aes(toy_isd$ln_energy, xmin = 0, xmax = 1)) +
    ggplot2::geom_histogram(data = toy_isd, stat = "bin", 
                            binwidth = 0.01,
                            show.legend = NA,
                            inherit.aes = TRUE) +
    ggplot2::labs(x = "ln(energy)", y = "Number of individuals", title = "Individuals energy distribution") +
    ggplot2::theme_bw()

isd_plot
ied_plot

MCLUST on ISD...

gmm_isd <- mclust::Mclust(toy_isd$ln_size, G = 1:15, modelNames = "V", 
     prior = NULL, 
     control = emControl(), 
     initialization = NULL, 
     warn = mclust.options("warn"), 
     x =  NULL, 
     verbose = interactive())

gmm_isd

gmm_isd$BIC

gmm_isd$parameters$mean

print(plot(gmm_isd, what = c("density")))

Trying it on real data...

andrews <- replicatebecs::load_paper_data()[[1]] %>%
  replicatebecs::add_energy_sizeclass() %>%
  dplyr::mutate(ln_size = log(individual_sizes), ln_energy = log(individual_energy))


andrews_isd_plot <- ggplot2::ggplot(data = andrews, ggplot2::aes(andrews$ln_size, xmin = 0, xmax = 1)) +
    ggplot2::geom_histogram(data = andrews, stat = "bin", 
                            binwidth = 0.01,
                            show.legend = NA,
                            inherit.aes = TRUE)  +
    ggplot2::labs(x = "ln(size)", y = "Number of individuals", title = "Individuals size distribution") +
    ggplot2::theme_bw()

andrews_isd_plot

gmm_andrews <- mclust::Mclust(andrews$ln_size, G = 1:15, modelNames = "V", 
     prior = NULL, 
     control = emControl(), 
     initialization = NULL, 
     warn = mclust.options("warn"), 
     x =  NULL, 
     verbose = interactive())

gmm_andrews

gmm_andrews$BIC

gmm_andrews$parameters$mean

print(plot(gmm_andrews, what = c("density")))


diazrenata/neonbecs documentation built on July 6, 2019, 9:25 a.m.