EMGauss = function(Data, K=NULL, Means=NULL,SDs=NULL,Weights=NULL,MaxNumberofIterations=10, fast=F){
#em=EMGauss(Data,K,Means,SDs,Weights)
# EM- Algorithm to calculate optimal Gaussian Mixture Model for given data in one Dimension
# In particular, no adding or removing of Gaussian kernels
# NOTE: this is probably not numerical stable, log(gauss) is not used.
#
# DESCRIPTION
#
# INPUT
# Data(1:AnzCases) the data points as a vector
# K estimated number of Gaussian kernels
# Means(1:L) estimated Gaussian Kernels = means(clustercenters),
# L == Number of Clustercenters; default=meanrobust(Data)
# SDs(1:L) estimated Gaussian Kernels = standard deviations; default=stdrobust(Data)
# Weights(1:L) relative number of points in Gaussians (prior probabilities): sum(Weights) ==1; default=1
# MaxNumberofIterations Number of Iterations; default=10
# fast Default: FALSE: Using mclust EM, TRUE: Own faster but maybe numerical unstable implementation
# OUTPUT
# Means means of GMM generated by EM algorithm
# SDs standard deviations of GMM generated by EM algorithm
# Weights prior probabilities of Gaussians
#
# Author Hansen-Goos 2014
# 1.Editor: MT 08/2015 bugfixes, new EM
# 2.Editor: FL 10/2016 Parameter k hinzugefuegt
#MT: Funktionen nach vorne verschoben
# meanrobust
#################################################
`meanrobust` <- function(x, p=0.1){
if(is.matrix(x)){
mhat<-c()
for(i in 1:dim(x)[2]){
mhat[i]<-mean(x[,i],trim=p,na.rm=TRUE)
}
} else mhat<-mean(x,trim=p,na.rm=TRUE)
return (mhat)
}
#################################################
# stdrobust
#################################################
`stdrobust` <- function(x,lowInnerPercentile=25){
#MT: nach vorne verschoben, damit es keinen Bug gibt
prctile<-function(x,p){
# matlab:
# Y = prctile(X,p) returns percentiles of the values in X.
# p is a scalar or a vector of percent values. When X is a
# vector, Y is the same size as p and Y(i) contains the p(i)th
# percentile. When X is a matrix, the ith row of Y contains the
# p(i)th percentiles of each column of X. For N-dimensional arrays,
# prctile operates along the first nonsingleton dimension of X.
if(length(p)==1){
if(p>1){p=p/100}
}
if(is.matrix(x) && ncol(x)>1){
cols<-ncol(x)
quants<-matrix(0,nrow=length(p),ncol=cols)
for(i in 1:cols){
quants[,i]<-quantile(x[,i],probs=p,type=5,na.rm=TRUE)
}
}else{
quants<-quantile(x,p,type=5,na.rm=TRUE)
}
return(quants)
}
if(is.vector(x) || (is.matrix(x) && dim(x)[1]==1)) dim(x)<-c(length(x),1)
lowInnerPercentile<-max(1,min(lowInnerPercentile,49))
hiInnerPercentile<- 100 - lowInnerPercentile
faktor<-sum(abs(qnorm(t(c(lowInnerPercentile,hiInnerPercentile)/100),0,1)))
std<-sd(x,na.rm=TRUE)
p<-c(lowInnerPercentile,hiInnerPercentile)/100
quartile<-prctile(x,p)
if (ncol(x)>1)
iqr<-quartile[2,]-quartile[1,]
else
iqr<-quartile[2]-quartile[1]
shat<-c()
for(i in 1:ncol(x)){
shat[i]<-min(std[i],iqr[i]/faktor,na.rm=TRUE)
}
dim(shat)<-c(1,ncol(x))
colnames(shat)<-colnames(x)
return (shat)
}
#################################################
# normpdf
#################################################
`normpdf` <- function(X,Mean,Sdev){
# computes the normal pdf at each of the values in X
# R funktion fuer PDF = normpdf(X,Mean,Sdev)
return(dnorm(X,Mean,Sdev))
} # end function normpdf
#################################################
#MT/FL: Fehlerabfang
if(!is.null(K)){
if(!is.null(Means)){
if(K != length(Means)) stop("K should be equal to length(Means)")
}else{
if(K == 1) Means = meanrobust(Data)
else Means = sample(Data, K)
}
if(!is.null(SDs)){
if(K != length(SDs)) stop("k should be equal to length(SDs)")
}else{
if(K == 1) SDs = stdrobust(Data)
else SDs = runif(K,0,sd(Data))
}
if(is.null(Weights)) Weights = rep(1/K, K)
}else{
if(!is.null(Means)) K = length(Means)
else if(!is.null(SDs)) K = length(SDs)
else if(!is.null(Weights)) K = length(Weights)
else K = 1
if(is.null(Means)) Means=meanrobust(Data)
if(is.null(SDs)) SDs=stdrobust(Data)
if(is.null(Weights)) Weights=rep(1/K,K)
}
# how many Clustercenters
L <- length(Means)
Ls <- length(SDs)
La <- length(Weights)
if (!((L == Ls ) && (L== La))){
warning('EMgauss: Number of means not equal to number of sdevs or Weights')
}
if(fast){
if (!(abs(sum(Weights)-1) <0.001)){
sumWeight <- sum(Weights)
for (i in 1:length(Weights)){
Weights[i] <- Weights[i]/sumWeight
}
}
AnzCases <- length(Data)
EMmean <- Means
EMsdev <- SDs
EMweights <- Weights
for (t in 1:MaxNumberofIterations){ # update EM Parameters
#1.) compute actual alpha weighted probability for each point
Na <- matrix(0,AnzCases,L)
for (i in 1:L) {# compute actual Distributions
Na[1:AnzCases,i] <- normpdf(Data,EMmean[i],EMsdev[i])* EMweights[i]
} # for (i in 1:L) # compute actual Distributions
#2.) compute total probability for each point
TotalEMGauss <- seq(0,0,length.out=AnzCases)
for (j in 1:AnzCases){
TotalEMGauss[j] <- sum(Na[j,1:L])
}
#3.) compute point's weight as 1.) divided by 2.)
w <- matrix(0,AnzCases,L)
for (j in 1:L){
w[1:AnzCases,j] <- Na[1:AnzCases,j]/TotalEMGauss
}
#4.) update Weightstrich AND weights
Weightstrich <- seq(0,0,length.out=L)
for (j in 1:L){
Weightstrich[j] <- sum(w[1:AnzCases,j]) # = alpha * AnzCases
if (is.nan(Weightstrich[j]) || is.na(Weightstrich[j])){
Weightstrich[j] <- 0
}
}
for (j in 1:L){
if (Weightstrich[j]<0.01){
Weightstrich[j] <- 0.009
}
}
EMweights <- Weightstrich/AnzCases
ZeroInd <- seq(0,0,length.out=L)
for (j in 1:L){
if (EMweights[j]<0.01){
ZeroInd[j] <- 1
EMweights[j] <- 0
}
}
if (sum(EMweights) <0.01){ # no right solution found: give up search & return robust values
EMweights <- seq(0,0,length.out=L)
EMmean <- EMweights
EMsdev <- EMweights
EMweights[1] <- 1
EMmean[1] <- meanrobust(Data)
EMsdev[1] <- stdrobust(Data)
# output <- list(mean=EMmean,sdev=EMsdev,weight=EMweights)
output <- list(Means=EMmean,SDs=EMsdev,Weights=EMweights) # edited: OHG 05.2016
warning("EM: no right solution found --> give up search & return robust values")
return(output)
break
}
#5.) update means
for (j in 1:L){
EMmean[j] <- sum(w[1:AnzCases,j]*Data)/Weightstrich[j]
}
# account for weight ==0
for (j in 1:L){
if (ZeroInd[j]){
EMmean[j] <- 0
}
}
#6. update std devs
for (j in 1:L){
EMsdev[j] <- sqrt(sum(w[1:AnzCases,j]*(Data-seq(1,1,length.out=AnzCases)*EMmean[j])^2)/Weightstrich[j])
}
# account for weight ==0
for (j in 1:L){
if (ZeroInd[j]){
EMsdev[j] <- 0
}
}
}# for t=1:MaxNumberofIterations # update EM Parameters
#output <- list(mean=EMmean,sdev=EMsdev,weight=EMweights)
output <- list(Means=EMmean,SDs=EMsdev,Weights=EMweights)
}else{ #fast=f
#requireRpackage('mclust')
#iterations???
requireNamespace('mclust')
em=mclust::densityMclust(Data,G=L, modelName='V', parameters=list(pro=Weights,mean=Means,variance=sqrt(SDs),control=list(itmax=c(MaxNumberofIterations,MaxNumberofIterations))))
res=em$parameters
# sigmasq entspricht hier sigma^2 = sd^2 => fuer SD nochmal Wurzel ziehen
output <- list(Means=unname(res$mean),SDs=sqrt(unname(res$variance$sigmasq)),Weights=unname(res$pro))
#output <- list(mean=unname(res$mean),sdev=unname(res$variance$sigmasq),weight=unname(res$pro))
return(output)
} # end if fast
} # eof
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.