#' Random Sampling of 1-dim. Mixtures of Normal Distribution
#'
#' Set sample size n and parameters of MixtNormal(mu,sigma,pi).
#' By definition, dimension of mu, sigma and pi must be the same.
#' In the function, parameter `pi` is normalized.
#' @param n <int scalar> : sample size
#' @param mu <double vector> : population means
#' @param sigma <double vector> : population sds
#' @param ratio <double vector> : mixtured ratio
#' @return The output will be <double vector> of size n that you input.
#'
#' @importFrom dplyr %>%
#' @importFrom purrr pmap_dbl
#' @importFrom stats rnorm
#' @export
#'
random_mixt_normal <- function(n, mu, sigma, ratio){
# 1. Preliminaries and Error Corrections
# record a number of components from dimension of the parameter `mu`
n_components <- length(mu)
ratio <- ratio / sum(ratio)
# sanity check for dimension of parameters
if (length(sigma) != n_components | length(ratio) != n_components){
stop("Error : dimension of mu, sigma and ratio must be the same.")
}
if (prod(sigma > 0) != 1){
stop("Error : sigma is not positive.")
}
# 2. Main Calculation
# decide a component that each point is sampled...
component_idx <- sample(x = 1:n_components, size = n, prob = ratio, replace = TRUE)
# sampling
obj <- list(component_idx = component_idx) %>%
pmap_dbl(.f = function(component_idx){
rnorm(1, mean = mu[component_idx], sd = sigma[component_idx])
})
# 3. output
return(obj)
}
#' Probability Density Function of 1-dim. Mixtures of Normal Distribution
#'
#' Set sample point x and parameters of MixtNormal(mu,sigma,pi).
#' By definition, dimension of mu, sigma and pi must be the same.
#' In the function, parameter `pi` is normalized.
#' @param x <double vector> : sample point
#' @param mu <double vector> : population means
#' @param sigma <double vector> : population sd
#' @param ratio <double vector> : mixtured ratio
#' @return The output is <tibble> that is probability densities of each saple point at each component and mixture distribution.
#'
#' @importFrom dplyr %>%
#' @importFrom dplyr as_tibble
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr everything
#' @importFrom purrr map
#' @importFrom stringr str_c
#' @importFrom stats dnorm
#' @export
#'
density_mixt_normal <- function(x, mu, sigma, ratio){
# 1. Preliminaries and Error Correction
# number of componets calculated by dimension of parameter `mu`
n_components <- length(mu)
ratio <- ratio / sum(ratio)
# sanity check of each parameter's dimension
if (length(sigma) != n_components | length(ratio) != n_components){
stop("Error : dimension of mu, sigma and ratio must be the same.")
}
if (prod(sigma > 0) != 1){
stop("Error : sigma is not positive.")
}
# 2. Main Calculation
# calculation of probability density
# pd_each_component : output matrix is probability densities of each component
# pd : output vector is probability density
pd_each_component <- as.list(x) %>%
map(.f = ~dnorm(., mean = mu, sd = sigma)) %>%
unlist() %>%
matrix(byrow = TRUE, ncol = n_components)
colnames(pd_each_component) <- str_c("component_", 1:n_components)
pd <- as.numeric(pd_each_component %*% ratio)
# 3. output
obj <- as_tibble(pd_each_component)
obj <- obj %>%
mutate(prob_density = pd, x = x) %>%
select(x, prob_density, everything())
return(obj)
}
#' Log Likelihood of 1-dim. Mixtures of Normal Distribution
#'
#' @param x <double vector> : sample point
#' @param mu <double vector> : population means
#' @param sigma <double vector> : population sd
#' @param ratio <double vector> : mixtured ratio
#' @return The output is <double vector> that is loglikelihood of each point of x
#'
#' @importFrom dplyr %>%
#' @export
#'
LL_mixt_normal <- function(x, mu, sigma, ratio){
# 1. Preliminaries and Error Correction
# number of components calculated from dimension of mu
n_components <- length(mu)
# sanity check
if (length(sigma) != n_components | length(ratio) != n_components){
stop("Error : dimension of mu, sigma and ratio must be the same.")
}
if (prod(sigma > 0) != 1){
stop("Error : sigma is not positive.")
}
# 2. Main Calculation
# calculation of log likelihood
obj <- sum(log(density_mixt_normal(x, mu = mu, sigma = sigma, ratio = ratio)$prob_density))
# output
return(obj)
}
#' EM Algorithm for 1-dim. Mixtures of Normal Distribution
#'
#' @param x <double vector> : sample
#' @param max_iter <int scalar> : maximum number of iterations
#' @param tol <double scalar> : tolerance
#' @param init_mu <double vector> : initial values of population means
#' @param init_sigma <double vector> : initial values of population sds
#' @param init_ratio <double vector> : initial values of population ratio
#' @return The output is the set (MLE of params, LL, estimated component of each sample point, n_iter).
#'
#' @importFrom dplyr tibble
#' @importFrom dplyr select
#' @importFrom dplyr bind_rows
#' @importFrom purrr map2_df
#' @importFrom purrr map_chr
#' @importFrom rlang .data
#' @export
#'
EM_mixt_normal <- function(x, max_iter, tol, init_mu, init_sigma, init_ratio){
# 1. Preliminalies and Error Correction
# number of components and sample size
sample_size <- length(x) # sample size
n_components <- length(init_mu)
init_ratio <- init_ratio / sum(init_ratio)
# sanity check for dimension of parameters
if (length(init_sigma) != n_components | length(init_ratio) != n_components){
stop("Error : dimension of mu, sigma and ratio must be the same.")
}
if (prod(init_sigma > 0) != 1){
stop("Error : init_sigma is not positive.")
}
# 2. Main Calculation
# settings of initial values
mu <- init_mu; sigma <- init_sigma; ratio <- init_ratio
# history
LL_history <- LL_mixt_normal(x = x, mu = mu, sigma = sigma, ratio = ratio)
params_history <- tibble(iter = 0,
component = as.character(1:n_components),
mu = mu,
sigma = sigma,
ratio = ratio)
# EM algorithm
for (iter in 1:max_iter) {
# E-step : gammma
likelihood <- density_mixt_normal(x = x, mu = mu, sigma = sigma, ratio = ratio)
gamma_each_component <- as.matrix(
map2_df(.x = likelihood[ , 3:(2+n_components)], .y = ratio, .f = function(x, y){x * y}) /
likelihood$prob_density
)
colnames(gamma_each_component) <- as.character(1:n_components) %>%
map_chr(.f = ~paste0("gamma", .))
sum_gamma <- colSums(gamma_each_component)
# M-step
ratio <- sum_gamma / sample_size # MLE of ratio
mu <- as.vector(x %*% gamma_each_component) / sum_gamma # MLE of mu
ncol <- length(x); nrow <- n_components # MLE of sigma
x_matrix <- matrix(x, byrow = TRUE, nrow = nrow, ncol = ncol)
mu_matrix <- matrix(mu, nrow = nrow, ncol = ncol)
sigma <- sqrt(
diag((x_matrix - mu_matrix) %*% (gamma_each_component * t(x_matrix - mu_matrix))) / sum_gamma
)
LL_history <- append(LL_history, # recoding history...
LL_mixt_normal(x = x, mu = mu, sigma = sigma, ratio = ratio))
params_history <- bind_rows(
params_history,
tibble(iter = iter,
component = as.character(1:n_components),
mu = mu,
sigma = sigma,
ratio = ratio)
)
# coveragement
if (abs(LL_history[iter+1]-LL_history[iter]) < tol){
break
}
}
# 3. output
n_iter <- iter
LL_history_df <- tibble(iter = 0:n_iter, log_likelihood = LL_history)
estimated_component <- str_c("component", max.col(gamma_each_component))
data_estimated_component <- tibble(x = x,
estimated_component = estimated_component)
LL_last <- LL_history_df %>% filter(iter == n_iter) %>% .$log_likelihood
AIC <- -2 * LL_last + 2 * (3 * n_components - 1)
BIC <- -2 * LL_last + (3 * n_components - 1) * log(sample_size)
params_last <- params_history %>% filter(iter == n_iter) %>% select(-iter)
obj <- list(params = params_last,
log_likelihood = LL_last,
AIC = AIC,
BIC = BIC,
estimated_component = data_estimated_component,
n_iter = n_iter,
params_history = params_history,
log_likelihood_history = LL_history_df)
class(obj) <- "EM_MixtNormal" # define class of the result object...
return(obj)
}
#' print result of EM_Mixt_Normal
#'
#' @param x <EM_MixtNormal> the result object of the function : em_mixt_normal
#' @param ... additional arguments
#' @export
#'
print.EM_MixtNormal <- function(x, ...){
# some calculations
n_components <- length(x$params$component)
component_names <- str_c("component_", 1:n_components, ":")
# print some results...
cat("* Parameters of Components:\n")
print(as.data.frame(x$params %>% select(-component)), row.names = component_names)
cat("attributes : $params, $log_likelihood, $AIC, $BIC, $estimated_component, $n_iter")
}
#' summary result of EM_Mixt_Normal
#'
#' summary.EM_MixtNormal used to procedure result summaries of the result of the function : EM_mixt_normal.
#' @param object <EM_MixtNormal> the result object of the function :
#' @param ... additional arguments affecting the summary produced.
#'
#' @importFrom dplyr group_by
#' @importFrom dplyr summarize
#' @importFrom dplyr select
#' @importFrom dplyr n
#' @export
#'
summary.EM_MixtNormal <- function(object, ...){
# some calculations
n_components <- length(object$params$component)
component_names <- str_c("component_", 1:n_components, ":")
num_components <- object$estimated_component %>%
group_by(estimated_component) %>%
summarize(numbers = n()) %>%
.$numbers
metrics <- c(object$log_likelihood, object$AIC, object$BIC, object$n_iter)
names(metrics) <- c("log_likelihood", "AIC", "BIC", "iterations")
# print some results...
cat("* Numbers of Components:\n")
print(data.frame(num_components = num_components, row.names = component_names))
cat("\n")
cat("* Parameters of Components:\n")
print(as.data.frame(object$params %>% select(-component)), row.names = component_names)
cat("\n")
cat("")
print(metrics)
}
#' Plot of history of loglikelihood
#'
#' @param x <EM_MixtNormal> the result object of the function :
#'
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 geom_line
#' @importFrom ggplot2 ggtitle
#' @export
#'
plot_LL <- function(x){
plt <- ggplot(data = x$log_likelihood_history, # 更新結果の可視化
mapping = aes(x = iter, y = log_likelihood)) +
geom_line() +
ggtitle("History of log likelihood")
return(plt)
}
#' Plot of histogram of components
#'
#' @param x <EM_MixtNormal> the result object of the function : EM_mixt_normal
#' @param binwidth <int_scalar> binwidth of histogram
#'
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 geom_histogram
#' @importFrom ggplot2 ggtitle
#' @export
#'
plot_components <- function(x, binwidth){
plt <- ggplot(data = x$estimated_component,
mapping = aes(x = x, fill = estimated_component)) +
geom_histogram(binwidth = binwidth, alpha = 0.3) +
ggtitle("Result of component estimation : EM-algorithm")
return(plt)
}
#' predict component that each datapoint belongs to
#'
#' @param object <EM_MixtNormal> result of EM_mixt_normal
#' @param x <double_scalar> 1-dim. data points
#' @param ... additional arguments
#'
#' @importFrom purrr pmap
#' @importFrom stats dnorm
#' @export
#'
predict.EM_MixtNormal <- function(object, x, ...){
n_components <- length(object$params$component)
params_list <- list(mu = object$params$mu,
sigma = object$params$sigma,
ratio = object$params$ratio)
gamma <- params_list %>% pmap(.f = function(mu, sigma, ratio){
ratio * dnorm(x, mu, sigma)
})
obj <- max.col(matrix(unlist(gamma), ncol = n_components))
return(obj)
}
#' plotting history of EM-algoritm with gif file
#'
#' @param result <EM_MixtNormal>
#' @param width <double_scalar> image size
#' @param height <double_scalar> image size
#' @y_max <double_scalar> y_max
#' @param file_name <string> gif file name
#'
#' @importFrom stringr str_c
#' @importFrom gifski gifski
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 geom_histogram
#' @importFrom ggplot2 stat_function
#' @importFrom ggplot2 ggtitle
#' @importFrom ggplot2 ggsave
#' @importFrom ggplot2 ylim
#' @export
#'
plot_history <- function(result, width = 5.00, height = 5.00, file_name = "history_of_EM"){
n_iter <- result$n_iter
n_components <- length(result$params$component)
x_min <- min(result$estimated_component$x)
x_max <- max(result$estimated_component$x)
y_max <- 5 * 1 / (x_max - x_min)
png_path <- str_c(tempdir(), "/plt", 0:n_iter, ".png")
for(i in 1:(n_iter + 1)){
params_each_iter <- result$params_history %>% filter(iter == (i - 1))
mu <- params_each_iter$mu
sigma <- params_each_iter$sigma
ratio <- params_each_iter$ratio
plt <- ggplot(data = result$estimated_component,
mapping = aes(x = x)) +
geom_histogram(aes(y=..density..), binwidth = 2.0, alpha = 0.5) +
stat_function(fun = function(x,mu,sigma,ratio){density_mixt_normal(x,mu,sigma,ratio)$prob_density},
args = list(mu = mu, sigma = sigma, ratio = ratio),
colour = "blue") +
ggtitle(paste0("History of EM-algorithm, iteration : ", (i - 1), "/", n_iter, ".")) +
ylim(c(0, y_max))
for(j in 1:n_components){
plt <- plt + stat_function(fun = function(x, mu, sigma, ratio){ratio*dnorm(x,mu,sigma)},
args = list(mu = mu[j], sigma = sigma[j], ratio = ratio[j]),
colour = "blue",
linetype = "dashed")
}
ggsave(png_path[i], width = width, height = height)
}
gif_path <- file.path(getwd(), str_c(file_name, ".gif"))
gifski(png_path, gif_path, width = width * 100, height = height * 100)
unlink(png_path)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.