inst/doc/introduction.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(fig.width = 5, fig.height = 5)

## -----------------------------------------------------------------------------
library(ggplot2)

library(disclapmix)
data(danes)

## -----------------------------------------------------------------------------
db <- as.matrix(danes[rep(seq_len(nrow(danes)), danes$n), seq_len(ncol(danes)-1)])
str(db)

## -----------------------------------------------------------------------------
fits <- disclapmix_adaptive(x = db)

## -----------------------------------------------------------------------------
BICs <- sapply(fits, function(x) x$BIC_marginal)
bestfit <- fits[[which.min(BICs)]]
bestfit

## -----------------------------------------------------------------------------
clusters <- seq_along(fits) # or: sapply(fits, function(x) nrow(x$y))
plot(clusters, BICs, type = "b")
points(nrow(bestfit$y), bestfit$BIC_marginal, pch = 4, cex = 4, col = "red")

## -----------------------------------------------------------------------------
db_haplotypes <- as.matrix(danes[, seq_len(ncol(db))])
p <- predict(bestfit, db_haplotypes)
plot(log10(danes$n / sum(danes$n)), log10(p), 
     xlab = "Observed relative frequency", 
     ylab = "Discrete Laplace estimated probability")
abline(0, 1)

## ---- eval=FALSE--------------------------------------------------------------
#  fit <- disclapmix(x = db, clusters = 2L)

## -----------------------------------------------------------------------------
clusters <- 1L:5L
fits <- lapply(clusters, function(clusters) {
  fit <- disclapmix(x = db, clusters = clusters)
  return(fit)
})

marginalBICs <- sapply(fits, function(fit) {
  fit$BIC_marginal
})

bestfit <- fits[[which.min(marginalBICs)]]

## -----------------------------------------------------------------------------
bestfit
summary(bestfit)

## ---- fig.width=10------------------------------------------------------------
plot(bestfit)

## -----------------------------------------------------------------------------
bestfit$y
bestfit$disclap_parameters

## -----------------------------------------------------------------------------
disclap_estimates <- predict(bestfit, 
                             newdata = as.matrix(danes[, 1:(ncol(danes) - 1)]))

## -----------------------------------------------------------------------------
ggplot() +
  geom_abline(intercept = 0, slope = 1) +
  geom_point(aes(x = danes$n/sum(danes$n), y = disclap_estimates)) +
  labs(x = "Observed frequency",
       y = "Predicted frequency (discrete Laplace)") +
  theme_bw() +
  scale_x_log10() +
  scale_y_log10()

Try the disclapmix package in your browser

Any scripts or data that you put into this service are public.

disclapmix documentation built on June 29, 2022, 5:06 p.m.