Nothing
knitr::opts_chunk$set( fig.width = 6, fig.height = 6, fig.align ='center' )
This file contains examples of the use of the weightedEM, unpack, and cwLocScat functions. It reproduces all the figures of the report "Analyzing cellwise weighted data" by P.J. Rousseeuw.
library("cellWise") n = 10 d = 3 A = matrix(0.7, d, d); diag(A) = 1 A set.seed(12345) library("MASS") X = mvrnorm(n, rep(0,d), A) colnames(X) = c("X1","X2","X3") X[1,3] = X[2,2] = X[3,1] = X[4,1] = X[6,2] = NA X # rows 1, 2, 3, 4, 6 have NAs w = c(1,2,1,1,2,2,1,1,1,1) # rowwise weights
out = weightedEM(X,w,crit=1e-12,computeloglik=T) out$niter # number of iteration steps taken out$mu round(out$Sigma,6) plot(1:out$niter, out$loglikhd[1:out$niter], type='l', lty=1, col=4, xlab='step', ylab='log(likelihood)', main='log(likelihood) of weighted EM iterations') tail(out$loglikhd) # would have NAs for computeloglik=F out$impX # imputed data, has no NA's
# The weights may be multiplied by a constant: # (w = c(1,2,1,1,2,2,1,1,1,1)/3) # divide weights by 3 out = weightedEM(X,w,crit=1e-12,computeloglik=T) out$niter # OK, same results: out$mu # same round(out$Sigma,6) # same tail(out$loglikhd) # converges to -11.3573 = -34.07189 / 3 # Create an equivalent matrix y without weights, by repeating # some rows according to their integer weights: # Y = X[c(1,2,2,3,4,5,5,6,6,7,8,9,10),] dim(Y) Y # This gives the same results: out = weightedEM(Y,crit=1e-12,computeloglik=T) # OK, same out$niter out$mu round(out$Sigma,6) tail(out$loglikhd) # converges to -34.07189 like before.
X = matrix(c(2.8,5.3,4.9,7.4, 2.3,5.7,4.3,7.2, 2.5,5.1,4.4,7.6),nrow=3,byrow=T) W = matrix(c(0.8,1.0,0.3,0.4, 0.3,0.5,0.9,0.5, 1.0,0.6,0,0.7),nrow=3,byrow=T) rownames(X) = rownames(W) = c("A","B","C") colnames(X) = colnames(W) = c("V1","V2","V3","V4") n = nrow(X); d = ncol(X) X W out = unpack(X,W) cbind(out$U,out$v) # OK dim(out$U)
set.seed(12345) n = 1000; d = 2 A = matrix(0.7, d, d); diag(A) = 1 A X = mvrnorm(n, rep(0,d), A) head(X) W = abs(mvrnorm(n, rep(0,d), diag(rep(1,2)))) W = W/max(as.vector(W)) W[2,1] = 0 W[5,2] = 0 head(W) fit = cwLocScat(X,W) fit$cwMLEiter # number of iteration steps fit$cwMLEmu fit$cwMean fit$cwMLEsigma fit$cwCov # similar to cwMLEsigma: fit$sqrtCov # same diagonal:
data("data_personality_traits") X <- data_personality_traits$X W <- data_personality_traits$W cbind(X,W) # as in table in the paper out = unpack(X,W) cbind(out$U,out$v) fit = cwLocScat(X,W) fit$cwMLEiter round(fit$cwMLEmu,2) round(fit$cwMean,2) round(fit$cwMLEsigma, 2) round(eigen(fit$cwMLEsigma)$values, 2) round(fit$cwCov, 2) round(eigen(fit$cwCov)$values,5) round(cov(X), 2) # unweighted round(eigen(cov(X))$values, 2)
Now we reproduce the figure in the paper
ellips = function(covmat, mu, quant=0.95, npoints = 120) { # computes points of the ellipse t(X-mu)%*%covmat%*%(X-mu) = c # with c = qchisq(quant,df=2) if (!all(dim(covmat) == c(2, 2))) stop("covmat is not 2 by 2") eig = eigen(covmat) U = eig$vectors R = U %*% diag(sqrt(eig$values)) %*% t(U) # square root of covmat angles = seq(0, 2*pi, length = npoints+1) xy = cbind(cos(angles),sin(angles)) # points on the unit circle fac = sqrt(qchisq(quant, df=2)) scale(fac*xy%*%R, center = -mu, scale=FALSE) } n = nrow(X) j = 3; k = 6 # to plot variables t3 and t6 xy = X[,c(j,k)] cov2cor(cov(X)[c(j,k),c(j,k)]) # unweighted correlation is 0.10 cov2cor(fit$cwMLEsigma[c(j,k),c(j,k)]) # now correlation is 0.31 (wxy = W[,c(j,k)]) duplicated(xy) # ties: row 4 equals row 3, and row 9 equals row 5 wxy[3,] = wxy[3,] + wxy[4,] # add cell weights of rows 3 and 4 wxy[5,] = wxy[5,] + wxy[9,] # add cell weights of rows 5 and 9 (wxy = wxy[-c(4,9),]) # remove duplicate rows (xy = xy[-c(4,9),]) # remove duplicate rows # pdf("personality_cwMLE_cwCov.pdf",width=5.5,height=5.5) myxlim = c(2,14); myylim = c(1,13) plot(xy, pch=16, col="white", xlim=myxlim, ylim=myylim, xlab="",ylab="") fac = 0.3 # for the size of the lines representing the cell weights for(i in seq_len(nrow(xy))){ WY = c(xy[i,1] - fac*wxy[i,1],xy[i,2]) # (WestX, Y) EY = c(xy[i,1] + fac*wxy[i,1],xy[i,2]) # (EastX, Y) XS = c(xy[i,1],xy[i,2] - fac*wxy[i,2]) # (X, SouthY) XN = c(xy[i,1],xy[i,2] + fac*wxy[i,2]) # (X, NorthY) lines(rbind(WY,EY),lwd=3) lines(rbind(XS,XN),lwd=3) } title(main="tolerance ellipses with and without cell weights", line=0.8,cex.main=1) # 1.2) title(xlab="trait 3",line=2.1,cex.lab=1.0) title(ylab="trait 6",line=2.1,cex.lab=1.0) center1 = colMeans(X[,c(j,k)]) covmat1 = (n-1)*cov(X[,c(j,k)])/n ell1 = ellips(covmat1, center1) lines(ell1,lwd=1.5,col="red") # ellipse from unweighted covariance fit2 = cwLocScat(xy,wxy) center2 = fit2$cwMLEmu covmat2 = fit2$cwMLEsigma ell2 = ellips(covmat2, center2) # ellipse from cwMLE estimates lines(ell2,lwd=1.5,col="blue") center3 = fit2$cwMean covmat3 = fit2$cwCov ell3 = ellips(covmat3, center3) # ellipse from cwMean and cwCov lines(ell3,lwd=1.5,lty=2,col="blue") legend("topleft",c("cwMLE","cwCov","MLE"), col=c("blue","blue","red"), lty=c(1,2,1), lwd=1.5, cex=1) # dev.off() # The blue ellipses, that use the cell weights, are a bit # higher and more slanted than the red ellipse that doesn't, # mainly due to the high cell weight of the y-coordinate of # the point in the top right corner.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.