Nothing
## ----preliminary, echo=FALSE, results="hide", message=FALSE, warning=FALSE, fig.keep="none"----
options(width=60, prompt="R> ", continue=" ", useFancyQuotes=FALSE, digits=3)
library(WeightedCluster)
library(TraMineR)
library(vegan)
library(knitr)
library(isotone)
hook_setwidth <- local({
default.width <- 0
function(before, options, envir){
if(before) {
default.width <<- getOption("width")
options(width = options$consolew)
} else{
options(width = default.width)
}
return(NULL)
}
})
knit_hooks$set(consolew =hook_setwidth)
##knit_hooks$set(crop = hook_pdfcrop)
knit_hooks$set(small.mar = function(before, options, envir) {
if (before) par(mar=c(2.1, 4.1, 4.1, 1.1)) # smaller margin on top and right
})
opts_knit$set(concordance=TRUE)
opts_chunk$set(message=FALSE, prompt=TRUE, dev="pdf", echo=TRUE, comment=NA, small.mar=TRUE, fig.align="center", fig.path="graphics/WC-", out.width=".8\\linewidth")
## knit_hooks$set(error = function(x, options) stop(x))
## knit_hooks$set(warning = function(x, options) stop("Warnings: ", x))
## ----WCload, eval=FALSE, fig.keep="none"------------------
# install.packages("WeightedCluster")
# library(WeightedCluster)
## ----distcompute, tidy=FALSE, fig.keep="none"-------------
data(mvad)
mvad.alphabet <- c("employment", "FE", "HE", "joblessness", "school",
"training")
mvad.labels <- c("Employment", "Further Education", "Higher Education",
"Joblessness", "School", "Training")
mvad.scodes <- c("EM", "FE", "HE", "JL", "SC", "TR")
mvadseq <- seqdef(mvad[, 17:86], alphabet = mvad.alphabet,
states = mvad.scodes, labels = mvad.labels,
weights = mvad$weight, xtstep = 6)
## Defining the custom cost matrix
subm.custom <- matrix(
c(0, 1, 1, 2, 1, 1,
1, 0, 1, 2, 1, 2,
1, 1, 0, 3, 1, 2,
2, 2, 3, 0, 3, 1,
1, 1, 1, 3, 0, 2,
1, 2, 2, 1, 2, 0),
nrow = 6, ncol = 6, byrow = TRUE)
## Computing the OM dissimilarities
mvaddist <- seqdist(mvadseq, method = "OM", indel = 1.5, sm = subm.custom)
## ----hclustcompute, consolew=50, fig.keep="none"----------
wardCluster <- hclust(as.dist(mvaddist), method="ward", members=mvad$weight)
## ----as.seqtreecompute, fig.keep="none"-------------------
wardTree <- as.seqtree(wardCluster, seqdata=mvadseq, diss=mvaddist, ncluster=6)
## ----seqtreedisplay, echo=FALSE, results="hide", eval=FALSE, fig.keep="none"----
# seqtreedisplay(wardTree, type="d", border=NA, filename="wardtree.png", showdepth=TRUE, showtree=FALSE)
## ----seqtreedisplay-fake, eval=FALSE, echo=TRUE, results="hide", fig.keep="none"----
# seqtreedisplay(wardTree, type="d", border=NA, showdepth=TRUE)
## ----cutreecompute, fig.keep="none"-----------------------
clust4 <- cutree(wardCluster, k=4)
## ----seqdplot-4clust, fig.width=7, fig.height=5-----------
seqdplot(mvadseq, group=clust4, border=NA)
## ----wcKMedoids-compute, fig.keep="none"------------------
pamclust4 <- wcKMedoids(mvaddist, k=4, weights=mvad$weight)
## ----pamclust4-plot, fig.width=7, fig.height=5------------
seqdplot(mvadseq, group=pamclust4$clustering, border=NA)
## ----wcKMedoids-print, fig.keep="none"--------------------
print(mvadseq[unique(pamclust4$clustering), ], format="SPS")
## ----wcKMedoids-compute4-ward, fig.keep="none"------------
pamwardclust4 <- wcKMedoids(mvaddist, k=4, weights=mvad$weight, initialclust=wardCluster)
## ----pamwardclust4-plot-ward, fig.width=7, fig.height=5----
seqdplot(mvadseq, group=pamwardclust4$clustering, border=NA)
## ----wcClusterQuality-compute, echo=2:3, consolew=70, fig.keep="none"----
options(digits=2)
clustqual4 <- wcClusterQuality(mvaddist, clust4, weights=mvad$weight)
clustqual4$stats
options(digits=3)
## ----wcClusterQuality-computeasw, fig.keep="none"---------
clustqual4$ASW
## ----silhouette-indexplot, echo=2:3, dev="png", dpi=600, fig.width=8.75, fig.height=5----
par(mar=c(2.1, 4.1, 4.1, 1.1))
sil <- wcSilhouetteObs(mvaddist, clust4, weights=mvad$weight, measure="ASWw")
seqIplot(mvadseq, group=clust4, sortv=sil)
## ----wcClusterQuality-compute9-clustqual4pam, echo=2, fig.keep="none"----
options(digits=1)
pamclust4$stats
options(digits=3)
## ----wcRange-compute, consolew=50, fig.keep="none"--------
wardRange <- as.clustrange(wardCluster, diss=mvaddist, weights=mvad$weight, ncluster=20)
summary(wardRange, max.rank=2)
## ----wcRange-plot, fig.width=8, fig.height=3.5------------
plot(wardRange, stat=c("ASWw", "HG", "PBC", "HC"))
## ----wcRange-plot-zscore, fig.width=8, fig.height=3.5-----
plot(wardRange, stat=c("ASWw", "HG", "PBC", "HC"), norm="zscore")
## ----wardClust6-plot, fig.width=7, fig.height=6-----------
seqdplot(mvadseq, group=wardRange$clustering$cluster6, border=NA)
## ----wcKMedRange-compute, echo=TRUE, fig.keep="none"------
pamRange <- wcKMedRange(mvaddist, kvals=2:20, weights=mvad$weight)
summary(pamRange, max.rank=2)
## ----cluster-naming, echo=TRUE, fig.keep="none"-----------
mvad$pam4 <- factor(pamclust4$clustering, levels=c(66, 467, 607, 641), labels=c("Train-Empl", "School-Empl", "High Ed", "Unempl"))
## ----auto-cluster-naming, echo=TRUE, fig.keep="none"------
mvad$pam4.auto <- seqclustname(mvadseq, pamclust4$clustering, mvaddist)
table( mvad$pam4.auto, mvad$pam4)
## ----mds-compute, echo=FALSE, fig.keep="none"-------------
worsq <- wcmdscale(mvaddist, w=mvad$weight, k=2)
mvad$test <- rep(-1, nrow(mvad))
for(clust in unique(pamclust4$clustering)){
cond <- pamclust4$clustering == clust
values <- worsq[cond, 2]
mvad$test[cond] <- as.integer(values > weighted.median(values, w=mvad$weight[cond]))
}
mvad$test <- factor(mvad$test, levels=0:1, labels=c("non-test", "test"))
## ----testdplot, fig.width=10, fig.height=4.5--------------
seqdplot(mvadseq, group=mvad$test, border=NA)
## ----testiplot-chisq, fig.keep="none"---------------------
tb <- xtabs(weight~test+pam4, data=mvad)
chisq.test(tb)
## ----testdplot-EE, fig.width=10, fig.height=4.5-----------
SchoolEmpl <- mvad$pam4=="School-Empl"
seqdplot(mvadseq[SchoolEmpl, ], group=mvad$test[SchoolEmpl], border=NA)
## ----testiplot-dissassoc, fig.keep="none"-----------------
set.seed(1)
dsa <- dissassoc(mvaddist, mvad$test, weights=mvad$weight, weight.permutation="diss", R=5000)
print(dsa$stat)
## ----seqtree-link-covar, echo=FALSE, results="hide", eval=FALSE, fig.keep="none"----
# set.seed(1)
# tree <- seqtree(mvadseq~gcse5eq+Grammar+funemp, data=mvad, diss=mvaddist, weight.permutation="diss")
# seqtreedisplay(tree, type="d", border=NA, filename="seqtree.png", showtree=FALSE)
## ----seqtree-link-covar-fake, eval=FALSE, echo=TRUE, results="hide", fig.keep="none"----
# tree <- seqtree(mvadseq~gcse5eq+Grammar+funemp, data=mvad, diss=mvaddist, weight.permutation="diss")
# seqtreedisplay(tree, type="d", border=NA)
## ----wcAggregateCases, echo=TRUE, fig.keep="none"---------
ac <- wcAggregateCases(mvad[, 17:86], weights=mvad$weight)
ac
## ----wcAggregateCases-seqdef, echo=TRUE, fig.keep="none"----
uniqueSeq <- seqdef(mvad[ac$aggIndex, 17:86], alphabet = mvad.alphabet,
states = mvad.scodes, labels = mvad.labels, weights=ac$aggWeights)
## ----wcAggregateCases-wckmedoids, echo=TRUE, fig.keep="none"----
mvaddist2 <- seqdist(uniqueSeq, method="OM", indel=1.5, sm=subm.custom)
pamclust4ac <- wcKMedoids(mvaddist2, k=4, weights=ac$aggWeights)
## ----wcAggregateCases-wckmedoids-back, echo=TRUE, fig.keep="none"----
mvad$acpam4 <- pamclust4ac$clustering[ac$disaggIndex]
## ----wcAggregateCases-wckmedoids-table, echo=TRUE, fig.keep="none"----
table(mvad$pam4, mvad$acpam4)
## ----perf-loadsimul, echo=FALSE, results="hide", message=FALSE, fig.keep="none"----
load(file="randB.RData")
randB$nnn <- factor(randB$n, levels=c(200, 500, 1000, 2000), labels=c("n=200", "n=500", "n=1000", "n=2000"))
library(lattice)
## ----perf-nbyk-plot-time, fig.width=8, fig.height=3.5, echo=FALSE----
print(xyplot(ClusterTime~k|nnn, data=randB, groups=test, type="l", auto.key = list(points=F, lines=T,corner = c(0, 0.8), cex=.9, size=2), scales=list(y = list(log = TRUE)), ylab="Computing time", xlab="Number of groups"))
## ----perf-nbyk-plot, fig.width=8, fig.height=3.5, echo=FALSE----
print(xyplot(relative.tot~k|nnn, data=randB, groups=test, type="l", auto.key = list(points=F, lines=T,corner = c(0, 0.8), size=2, cex=.9), ylab="Computing time", xlab="Number of groups"))
## ----mdscomputeannexe, echo=TRUE, results="hide", fig.keep="none"----
library(vegan)
worsq <- wcmdscale(mvaddist, w=mvad$weight, k=2)
## ----mdscomputeannexesecond, echo=TRUE, results="hide", consolew=50, fig.keep="none"----
library(isotone)
mvad$test <- rep(-1, nrow(mvad))
for(clust in unique(pamclust4$clustering)){
cond <- pamclust4$clustering == clust
values <- worsq[cond, 2]
mvad$test[cond] <- values> weighted.median(values, w=mvad$weight[cond])
}
mvad$test <- factor(mvad$test, levels=0:1, labels=c("non-test", "test"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.