Nothing
# The EMbC Package for R
#
# Copyright 2013, 2014, 2015 Joan Garriga <jgarriga@ceab.csic.es>, Aitana Oltra <aoltra@ceab.csic.es>, John R.B. Palmer <johnrbpalmer@gmail.com>, Frederic Bartumeus <fbartu@ceab.csic.es>
#
# EMbC is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
#
# EMbC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses.
# bivariate EM behavioral Clustering Algorithm
# --------------------------------------------
setGeneric("clst",function(bC,maxItr=200,info=0,check=0,...){standardGeneric("clst")})
setMethod("clst",signature("binClst"),function(bC,maxItr,info,...){
bC@m <- ncol(bC@X)
bC@k <- 2**bC@m
bC@n <- nrow(bC@X)
bC@C <- getColors(bC@k)
minVar <- bC@stdv**2
lblDst <- numeric()
# delimiters
Rdel <- lapply(1:bC@k,function(k){
lapply(1:bC@m,function(m){
bk <- as.integer(rev(intToBits(k-1)[1:bC@m]))
if (bk[m]==0) {
kL <- strtoi(paste(bk,collapse=''),base=2)+1
bk[m] <- 1
kH <- strtoi(paste(bk,collapse=''),base=2)+1
list(m,kL,kH)}
})
})
Rdel <- t(matrix(unlist(Rdel),c(3,bC@k)))
bC@R <- getRmatrix_cpp(bC,info)
rownames(bC@R) <- getkLbls(bC,kNmbrs=TRUE)
colnames(bC@R) <- c(sapply(seq(bC@m),function(m) paste('X',m,'.min',sep='')),sapply(seq(bC@m),function(m) paste('X',m,'.max',sep='')))
# initial assignements
bC@W <- matrix(rep(1/bC@k,bC@n*bC@k),c(bC@n,bC@k))
if (maxItr==0) bC@A <- getClusters_cpp(bC@X, bC@U, bC@W, bC@R)
else bC@A <- floor(runif(bC@n,min=1,max=bC@k+1))
# parameters
bC@P <- lapply(seq(bC@k),function(k) list(M=rep(0,bC@m),S=matrix(rep(0,bC@m+bC@m**2),c(bC@m,bC@m+1))))
bC@P <- getTheta(bC,minVar)
# show initial status
stepInfo(bC,-0,length(bC@A),0,info)
# optimization loop
A_ <- bC@A
R_ <- bC@R
stableItr <- 0
if (maxItr==0) return(bC)
for (itrNmb in 1:maxItr){
# likelihood densities
L <- array(-Inf,c(bC@n,bC@k))
for (k in seq(bC@k)){
if (!is.null(bC@P[[k]]$S) && mean(bC@W[,k])>0){
Xb <- bound2R_cpp(bC@X, bC@R[k,])
if (length(which(Xb==TRUE))>1){
UW <- bC@U*matrix(rep(bC@W[,k],bC@m),c(bC@n,bC@m))
bC@P[[k]] <- gssTheta(bC,Xb,UW,k,minVar)
if (!is.null(bC@P[[k]]$S))
L[,k] <- log(mean(bC@W[,k])) + dmnorm(bC@X,bC@P[[k]]$M,bC@P[[k]]$S,log=TRUE)
}
}
}
# densities2weights
bC@W <- dens2wght_cpp(L)
# new clustering assignments
bC@A <- getClusters_cpp(bC@X, bC@U, bC@W, bC@R)
# likelihood
bC@L[itrNmb] <- getLkh_cpp(L)
# show current status
if (info>=0) stepInfo(bC,bC@L[length(bC@L)],length(which(bC@A!=A_)),itrNmb,info)
# check Cycles and stop criterium
if (itrNmb>1){
lblDst[itrNmb] <- sqrt(sum((bC@A-A_)^2))
if (lblDst[itrNmb]==0 && all(bC@R==R_)){
stableItr <- stableItr +1
if (stableItr==2){
print('... Stable clustering',quote=FALSE)
break
}
}
else stableItr <- 0
}
if (itrNmb>100){
if (chckCycl(lblDst)){
print('... break: optimization cycle',quote=FALSE)
break
}
}
A_ <- bC@A
R_ <- bC@R
# M-step: New Reference Values
for (i in 1:dim(Rdel)[1]){
m <- Rdel[i,1]
kL <- Rdel[i,2]
kH <- Rdel[i,3]
if (is.na(bC@P[[kL]]$M[m])) r <- min(bC@X[,m])
else if (is.na(bC@P[[kH]]$M[m]) || (bC@P[[kL]]$M[m]==bC@P[[kH]]$M[m])) r <- max(bC@X[,m])
else{
r <- newDel(bC,m,kL,kH)
if (!is.null(r)){
bC@R[kL,bC@m+m] <- r
bC@R[kH,m] <- r
}
}
}
}
return(bC)
})
# EMbC internal functions
# -----------------------
# Initial Reference Values (C++)
getRmatrix_cpp <- function(bC,info){
if (info>=0) cat('... computing starting delimiters \n')
R <- matrix(rep(0, bC@k*bC@m*2), c(bC@k,bC@m*2))
remainingSplits <- list(list(vars=1:bC@m, data=bC@X, clst=rep(-1,bC@m)))
while (length(remainingSplits)>0){
remainingVars <- remainingSplits[[1]]$vars
splittingData <- remainingSplits[[1]]$data
hlf <- ceiling(dim(splittingData)[1]/2)
dLst <- lapply(remainingVars,function(m){
sPrc <- splitPrc_cpp(splittingData[,m])
return(list(m=m, splitVal=sPrc$sVal, kPrc=sPrc$kPrc))
})
splitIdx <- which.max(lapply(dLst,function(d) d$kPrc))
splitVar <- dLst[[splitIdx]]$m
splitVal <- dLst[[splitIdx]]$splitVal
clusterKey <- remainingSplits[[1]]$clst
patternKey <- which(clusterKey!=-1)
for (k in 1:bC@k){
bk <- as.integer(rev(intToBits(k-1)[1:bC@m]))
if (all(bk[patternKey]==clusterKey[patternKey])){
if (bk[splitVar]==0){
R[k,splitVar] <- min(bC@X[,splitVar])
R[k,(splitVar+bC@m)] <- splitVal
if (info>1){
del <- getkLbls(bC)[[k]]
substr(del,splitVar,splitVar) <- 'l'
cat('... delimiter ',del,': ',splitVal,'\n')
}
}
else{
R[k,splitVar] <- splitVal
R[k,(splitVar+bC@m)] <- max(bC@X[,splitVar])
if (info>1){
del <- getkLbls(bC)[[k]]
substr(del,splitVar,splitVar) <- 'h'
cat('... delimiter ',del,': ',splitVal,'\n')
}
}
}
}
remainingVars <- remainingVars[remainingVars!=splitVar]
if (length(remainingVars)>0){
lowData <- which(splittingData[,splitVar]<=splitVal)
if (length(lowData)>1){
lowClst <- clusterKey
lowClst[splitVar] <- 0
remainingSplits[[length(remainingSplits)+1]] <- list(vars=remainingVars, data=splittingData[lowData,], clst=lowClst)
}
hghData <- which(splittingData[,splitVar]>=splitVal)
if (length(hghData)>1){
hghClst <- clusterKey
hghClst[splitVar] <- 1
remainingSplits[[length(remainingSplits)+1]] <- list(vars=remainingVars, data=splittingData[hghData,], clst=hghClst)
}
}
remainingSplits[[1]] <- NULL
}
return(R)
}
# (Rbounded) gaussian parameters
gssTheta <- function(bC,Xb,UW,k,minVar,lower=-Inf,upper=Inf) {
Xk <- bC@X[which(Xb==TRUE),]
UWk <- UW[which(Xb==TRUE),]
Mk <- as.numeric(lapply(1:bC@m, function(m){ sum(UWk[,m]*Xk[,m])/sum(UWk[,m])}))
Sk <- matrix(rep(0,bC@m**2), c(bC@m,bC@m))
for (mi in 1:bC@m){
if (!is.na(Mk[mi])){
Sk[mi,mi:bC@m] <- sapply(mi:bC@m,function(mj){
UWij <- sqrt(UW[,mi]**2+UW[,mj]**2)/sqrt(2)
ifelse ((sum(UWij)<=0), -Inf, sum(UWij*(bC@X[,mi]-Mk[mi])*(bC@X[,mj]-Mk[mj]))/sum(UWij))
})
}
}
for (mi in 1:bC@m){
# if (Sk[mi,mi]>0 && Sk[mi,mi]<minVar[mi]){
if (abs(Sk[mi,mi])<minVar[mi]){
Sk[mi,] <- rep(0, bC@m)
Sk[mi,mi] <- minVar[mi]
}
Sk[,mi] <- Sk[mi,]
}
if (is.null(pd.solve(Sk, silent=TRUE))){
cat('... singular covariance matrix for', sprintf("cluster=%d",k), '\n')
Sk <- NULL
}
return(list(M=Mk, S=Sk))
}
# delimiters
newDel <- function(bC,m,kL,kH){
subSet <- which(bC@X[,m]>=bC@P[[kL]]$M[m] & bC@X[,m]<=bC@P[[kH]]$M[m])
if (mean(bC@W[,kL])==0 || is.null(bC@P[[kL]]$S) || length(subSet)<2)
if (length(which(bC@A==kL))==0)
r <- min(bC@X[,m])
else
r <- bC@P[[kL]]$M[m]
else if (mean(bC@W[,kH])==0 || is.null(bC@P[[kH]]$S) || length(subSet)<2)
if (length(which(bC@A==kH))==0)
r <- max(bC@X[,m])
else
r <- bC@P[[kH]]$M[m]
else {
X <- bC@X[subSet,]
for (j in seq(bC@m))
if (j!=m) X[,j] <- bC@P[[kL]]$M[j] + (X[,m]-bC@P[[kL]]$M[m])*(bC@P[[kH]]$M[j]-bC@P[[kL]]$M[j])/(bC@P[[kH]]$M[m]-bC@P[[kL]]$M[m])
# cluster lognormal densities
dL <- log(mean(bC@W[,kL])) + dmnorm(X,bC@P[[kL]]$M,bC@P[[kL]]$S,log=TRUE)
dH <- log(mean(bC@W[,kH])) + dmnorm(X,bC@P[[kH]]$M,bC@P[[kH]]$S,log=TRUE)
# densities to weights
W <- dens2wght_cpp(cbind(dL,dH))
# find equiprobability point
if (all(W[,1]==W[,2]))
return(NULL)
else if (all(W[,1]<W[,2]))
if (length(which(bC@A==kL))==0)
r <- min(bC@X[,m])
else
r <- bC@P[[kL]]$M[m]
else if (all(W[,1]>W[,2]))
if (length(which(bC@A==kH))==0)
r <- max(bC@X[,m])
else
r <- bC@P[[kH]]$M[m]
else{
r <- X[which.min(abs(W[,1]-W[,2])),m]
r <- max(r,bC@P[[kL]]$M[m])
r <- min(r,bC@P[[kH]]$M[m])
}
return(r)
}
}
# show current status
stepInfo <- function(bC,lkh,lblDff,itrNmb,info) {
it <- formatC(itrNmb,width=3)
lk <- formatC(lkh,width=10,format='e',flag="-")
df <- formatC(lblDff,width=8)
nK <- formatC(sum(as.numeric(lapply(1:bC@k,function(k) as.integer(any(bC@A==k))))),width=6)
A <- as.numeric(lapply(1:bC@k,function(k){formatC(round(length(bC@A[bC@A==k])/bC@n,4),width=6,flag="-")}))
print(paste(it,lk,nK,df,sep=" "),quote=FALSE)
if (info>0) stts(bC)
if (info>1) print(bC@R)
}
# check cycles
chckCycl <- function(lblDst){
for (i in 1:10){
cue <- seq((length(lblDst)-i),length(lblDst))
cueMean <- mean(lblDst[cue])
cueSmDv <- sum(lblDst[cue]-cueMean)
if (all(unlist(lapply(1:5,function(j){(sum(lblDst[(cue-((j*i)+1))]-cueMean)==cueSmDv)}))))
return(TRUE)
}
return(FALSE)
}
# compute parameters
# (also intended for external methods, i.e. "rlbl", allthough not used at the moment)
getTheta <- function(bC,minVar){
for (k in seq(bC@k)){
if (mean(bC@W[,k])>0){
# Xb <- bound2R(bC,k)
Xb <- bound2R_cpp(bC@X, bC@R[k,])
if (length(which(Xb==TRUE))>1){
UW <- bC@U*matrix(rep(bC@W[,k],bC@m),c(bC@n,bC@m))
bC@P[[k]] <- gssTheta(bC,Xb,UW,k,minVar)
}
}
}
return(bC@P)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.