inst/doc/TraMineR-state-sequence.R

### R code from vignette source 'TraMineR-state-sequence.Rnw'

###################################################
### code chunk number 1: preliminary
###################################################
options(width=76, prompt="R> ", continue="+   ", encoding="UTF-8", useFancyQuotes=FALSE)
library(TraMineR)
library(xtable)

## place where figures generated by Sweave will be stored
dir.create("Graphics", showWarnings = FALSE)
graphdir <- "Graphics/"


###################################################
### code chunk number 2: install-load
###################################################
## install.packages("TraMineR", repos="http://mephisto.unige.ch/traminer/R")
library("TraMineR")
data("mvad")
mvad.alphab <- c("employment", "FE", "HE", "joblessness", "school", "training")
mvad.seq <- seqdef(mvad, 17:86, xtstep=6, alphabet=mvad.alphab)
##mvad.seq <- seqdef(mvad, 17:86, xtstep=6)


###################################################
### code chunk number 3: mvad-dist
###################################################
mvad.om <- seqdist(mvad.seq, method = "OM", indel = 1, sm = "TRATE")


###################################################
### code chunk number 4: mvad-clust-intro
###################################################
library("cluster")
clusterward <- agnes(mvad.om, diss=TRUE, method="ward")
mvad.cl4 <- cutree(clusterward, k=4)
cl4.lab <- factor(mvad.cl4, labels = paste("Cluster",1:4))


###################################################
### code chunk number 5: mvad-clust-seqdplot
###################################################
seqdplot(mvad.seq, group=cl4.lab, border=NA)
#seqdplot(mvad.seq, group=cluster5, border=NA)


###################################################
### code chunk number 6: fig_cluster-seqdplot
###################################################
seqdplot(mvad.seq, group=cl4.lab, border=NA)
#seqdplot(mvad.seq, group=cluster5, border=NA)


###################################################
### code chunk number 7: regression
###################################################
entropies <- seqient(mvad.seq)
lm.ent <- lm(entropies ~ male + funemp + gcse5eq, mvad)


###################################################
### code chunk number 8: regression-output
###################################################
##xt1 <- xtable(lm.ent, align="|crrr|", digits=1)
lm.xtable <- xtable(lm.ent, digits=2, align="|r|rrrr|")
rownames(lm.xtable)[2:4] <- c("Male", "Father unemployed", "Good end compulsory school grade")
colnames(lm.xtable)[2:4] <- c("Std error", "$t$ value", "Pr$(>|t|)$")
##print(lm.xtable, floating=FALSE, caption.placement="top", hline.after=c(-1,0,0,nrow(lm.xtable)),
print(lm.xtable, floating=FALSE, caption.placement="top",
  sanitize.text.function = function(x){x})


###################################################
### code chunk number 9: mvad-extract
###################################################
mvad[1:2, 17:22]


###################################################
### code chunk number 10: reducing_width
###################################################
## oopt <- options(width=60)


###################################################
### code chunk number 11: mvad-seq
###################################################
mvad.lab <- c("Employment", "Further education", "Higher education", "Joblessness", "School", "Training")
mvad.scode <- c("EM", "FE", "HE", "JL", "SC", "TR")
mvad.seq <- seqdef(mvad, 17:86, alphabet=mvad.alphab, states = mvad.scode,
     labels = mvad.lab, xtstep=6)


###################################################
### code chunk number 12: mvad-gcse5eq-levels
###################################################
levels(mvad$gcse5eq) <- c("<5 GCSEs", ">=5 GCSEs")


###################################################
### code chunk number 13: resetting_width
###################################################
## options(oopt)


###################################################
### code chunk number 14: mvad-seq-print
###################################################
print(mvad.seq[1:5, ], format="SPS")


###################################################
### code chunk number 15: seqdef-weighted
###################################################
mvad.seq <- seqdef(mvad, 17:86, alphabet=mvad.alphab, states = mvad.scode,
     labels = mvad.lab, weights=mvad$weight, xtstep=6)


###################################################
### code chunk number 16: iplot-A
###################################################
seqiplot(mvad.seq, border=NA, with.legend="right")


###################################################
### code chunk number 17: fig_iplot-A
###################################################
seqiplot(mvad.seq, border=NA, with.legend="right")


###################################################
### code chunk number 18: iplot-Aa
###################################################
seqiplot(mvad.seq, border=NA, with.legend="right")


###################################################
### code chunk number 19: Iplot-mdscale
###################################################
mvad.lcs <- seqdist(mvad.seq, method="LCS")
mds <- cmdscale(mvad.lcs, k=1)
dref <- seqdist(mvad.seq, refseq=0, method="LCS")


###################################################
### code chunk number 20: Iplot-sorted
###################################################
png(file=paste(graphdir,"png_mvad_seqIplot-sorted.png",sep=""), unit="px", width=1600, height=700, pointsize=30)

par(mfrow=c(1,3))
seqIplot(mvad.seq, main="unsorted", with.legend=FALSE)
seqIplot(mvad.seq, sortv=dref, main="by distance to most frequent", with.legend=FALSE)
seqIplot(mvad.seq, sortv=mds, main="by 1st MDS factor", with.legend=FALSE)
dev.off()


###################################################
### code chunk number 21: seqtab
###################################################
seqtab(mvad.seq, idxs=1:4)


###################################################
### code chunk number 22: reducing_width
###################################################
oopt <- options(width=60)


###################################################
### code chunk number 23: seqfplot
###################################################
par(mfrow=c(1,2))
seqfplot(mvad.seq, border=NA, with.legend=FALSE,
	main="Weighted frequencies")
seqfplot(mvad.seq, weighted=FALSE, border=NA, with.legend=FALSE,
	main="Unweighted frequencies")


###################################################
### code chunk number 24: resetting_width
###################################################
options(oopt)


###################################################
### code chunk number 25: fig-seqfplot
###################################################
par(mfrow=c(1,2))
seqfplot(mvad.seq, border=NA, with.legend=FALSE,
	main="Weighted frequencies")
seqfplot(mvad.seq, weighted=FALSE, border=NA, with.legend=FALSE,
	main="Unweighted frequencies")


###################################################
### code chunk number 26: plot-mean-time
###################################################
seqmtplot(mvad.seq, group=mvad$funemp, ylim=c(0,30))


###################################################
### code chunk number 27: fig_plot-mean-time
###################################################
seqmtplot(mvad.seq, group=mvad$funemp, ylim=c(0,30))


###################################################
### code chunk number 28: mean-time
###################################################
by(mvad.seq, mvad$funemp, seqmeant)


###################################################
### code chunk number 29: seqtrate
###################################################
mvad.trate <- seqtrate(mvad.seq)
round(mvad.trate,2)


###################################################
### code chunk number 30: seqstatd
###################################################
seqstatd(mvad.seq[,1:8])


###################################################
### code chunk number 31: seqdplot-mvad
###################################################
seqdplot(mvad.seq, group=mvad$gcse5eq, border=NA)


###################################################
### code chunk number 32: seqdplot-mvad-fig
###################################################
seqdplot(mvad.seq, group=mvad$gcse5eq, border=NA)


###################################################
### code chunk number 33: seqmodst
###################################################
seqmsplot(mvad.seq, group=mvad$gcse5eq, border=NA)


###################################################
### code chunk number 34: seqmsplot
###################################################
seqmsplot(mvad.seq, group=mvad$gcse5eq, border=NA)


###################################################
### code chunk number 35: trans-entropy
###################################################
seqHtplot(mvad.seq, group=mvad$gcse5eq)


###################################################
### code chunk number 36: hist-trans-entropy
###################################################
seqHtplot(mvad.seq, group=mvad$gcse5eq)


###################################################
### code chunk number 37: data-ex1
###################################################
e1m1 <- c("A","A","A","A","A","A","A","A","A","A","A","A")

## 2 states
e2m1 <- c("A","A","A","A","A","A","B","B","B","B","B","B")
e2m2 <- c("A","B","A","B","A","B","A","B","A","B","A","B")
e2m3 <- c("A","A","A","A","A","A","A","A","A","A","A","B")
e2m4 <- c("A","B","B","B","B","B","B","B","B","B","B","A")
e2m5 <- c("A","A","A","A","B","B","B","B","A","A","A","A")
e2m7 <- c("A","A","B","B","A","A","B","B","A","A","B","B")
e2m8 <- c("A","B","B","A","A","B","B","A","A","B","B","A")
e2m9 <- c("A","B","A","A","A","A","A","A","A","A","B","A")
e2m10 <- c("A","B","A","B","A","B","A","B","A","A","A","A")

## 3 states
e3m1 <- c("A","A","A","A","B","B","B","B","C","C","C","C")
e3m2 <- c("A","B","C","A","B","C","A","B","C","A","B","C")
e3m3 <- c("A","A","A","A","A","A","A","A","A","A","B","C")
e3m4 <- c("A","B","B","B","B","B","C","C","C","C","C","A")
e3m5 <- c("A","A","A","A","A","B","C","A","A","A","A","A")
e3m6 <- c("A","B","C","B","A","C","A","C","B","C","B","A")
e3m7 <- c("A","A","B","B","C","C","A","A","B","B","C","C")
e3m8 <- c("A","B","C","C","B","A","A","B","C","C","B","A")
e3m9 <- c("A","B","C","A","A","A","A","A","A","C","B","A")
e3m10 <- c("A","B","C","A","B","C","A","B","A","A","A","A")

## 4 states
e4m1 <- c("A","A","A","B","B","B","C","C","C","D","D","D")
e4m2 <- c("A","B","C","D","A","B","C","D","A","B","C","D")
e4m3 <- c("A","A","A","A","A","A","A","A","A","B","C","D")
e4m4 <- c("A","B","B","B","C","C","C","C","D","D","D","A")
e4m5 <- c("A","A","A","A","B","C","D","A","A","A","A","A")
e4m6 <- c("A","B","C","D","B","A","C","D","A","C","B","D")
e4m7 <- c("A","A","B","B","C","C","D","D","A","A","B","B")
e4m8 <- c("A","B","C","D","D","C","B","A","A","B","C","D")
e4m9 <- c("A","B","C","D","A","A","A","A","D","C","B","A")
e4m10 <- c("A","B","C","D","A","B","C","D","A","A","A","A")

##
ex.comp <- rbind(e1m1,
	e2m3,e2m1,e2m5,e2m4,e2m9,e2m10,e2m2,
	e3m3,e3m1,e3m5,e3m4,e3m9,e3m10,e3m6,e3m2,
	e4m3,e4m1,e4m5,e4m4,e4m9,e4m10,e4m6,e4m2)

## we keep sequence names to identify them
seqnames <- rownames(ex.comp)

ex.comp <- ex.comp[c(1,2,3,4,8,17,18,19,24),]

ex.comp <- seqdef(ex.comp, id="auto")


###################################################
### code chunk number 38: fig_comp_ex
###################################################
par(mfrow=c(1,2))
seqiplot(ex.comp, border=NA, with.legend=FALSE, idxs=0, main="Example sequences")
ex.comp.indic <- cbind(seqtransn(ex.comp, norm=TRUE), seqient(ex.comp), seqST(ex.comp)/max(seqST(ex.comp)), seqici(ex.comp))
barplot(t(ex.comp.indic),
 col=c("red","blue","cyan", "yellow"), horiz=TRUE, beside=TRUE,
 ## xlim=c(0,0.4),
 main="Longitudinal characteristics")
legend(x="bottomright", lwd=6,
	legend=c("Transitions", "Entropy", "Turbulence", "Complexity"),
	col=c("red","blue","cyan", "yellow")
)


###################################################
### code chunk number 39: actcal-seqistatd
###################################################
seqistatd(mvad.seq[1:4,])


###################################################
### code chunk number 40: seqient-mvad
###################################################
mvad.ient <- seqient(mvad.seq)


###################################################
### code chunk number 41: comp-metrics
###################################################
scost <- seqsubm(mvad.seq, method = "TRATE")
mvad.om.ref <- seqdist(mvad.seq, refseq=0, method="OM", sm=scost)
mvad.lcs.ref <- seqdist(mvad.seq, refseq=0, method="LCS")
mvad.lcp.ref <- seqdist(mvad.seq, refseq=0, method="LCP")
mvad.dhd.ref <- seqdist(mvad.seq, refseq=0, method="DHD")
mvad.ham.ref <- seqdist(mvad.seq, refseq=0, method="HAM")


###################################################
### code chunk number 42: scost
###################################################
scost <- seqsubm(mvad.seq, method="TRATE")
round(scost,3)


###################################################
### code chunk number 43: DHD-HAM
###################################################
dhd.vs.ham <- abs(mvad.dhd.ref-(4*mvad.ham.ref))


###################################################
### code chunk number 44: fig_metrics_mvad
###################################################
mvad.metrics <- data.frame(LCP=mvad.lcp.ref, LCS=mvad.lcs.ref, OM=mvad.om.ref,
HAM=mvad.ham.ref, DHD=mvad.dhd.ref)

pairs(mvad.metrics, col="blue", pch=20, ps=0.1)


###################################################
### code chunk number 45: OM-mvad
###################################################
mvad.om <- seqdist(mvad.seq, method="OM", indel=1, sm=scost)


###################################################
### code chunk number 46: LCS-OM
###################################################
LCS.vs.OM <- abs(mvad.lcs.ref-mvad.om.ref)


###################################################
### code chunk number 47: OM-maxdist
###################################################
D.max <- 70*min(2, max(scost))


###################################################
### code chunk number 48: seqrep-mvad
###################################################
medoid <- seqrep(mvad.seq, diss=mvad.om, criterion="dist", nrep=1)
print(medoid, format="SPS")


###################################################
### code chunk number 49: seqrplot-mvad-density
###################################################
seqrplot(mvad.seq, group=mvad$gcse5eq, diss=mvad.om, border=NA)


###################################################
### code chunk number 50: fg-seqrplot-mvad
###################################################
seqrplot(mvad.seq, group=mvad$gcse5eq, diss=mvad.om, border=NA)


###################################################
### code chunk number 51: dendro-mvad-om
###################################################
plot(clusterward, which.plots=2, labels=FALSE)


###################################################
### code chunk number 52: fig_cluster-mvad-om
###################################################
plot(clusterward, which.plots=2, labels=FALSE)


###################################################
### code chunk number 53: mvad-clust-viz-rplot
###################################################
seqrplot(mvad.seq, group=cl4.lab, diss=mvad.om, coverage=.35, border=NA)


###################################################
### code chunk number 54: fig_cluster-seqrplot
###################################################
seqrplot(mvad.seq, group=cl4.lab, diss=mvad.om, coverage=.35, border=NA)


###################################################
### code chunk number 55: mvad-cl4-logreg
###################################################
mb4 <- (cl4.lab=="Cluster 4")
glm.cl4 <- glm(mb4 ~ male + funemp + gcse5eq, data=mvad, family="binomial")


###################################################
### code chunk number 56: mvad-prep-ORtable
###################################################
glm.cl1 <- glm((cl4.lab=="Cluster 1") ~ male + funemp + gcse5eq, data=mvad, family="binomial")
glm.cl2 <- glm((cl4.lab=="Cluster 2") ~ male + funemp + gcse5eq, data=mvad, family="binomial")
glm.cl3 <- glm((cl4.lab=="Cluster 3") ~ male + funemp + gcse5eq, data=mvad, family="binomial")
cl1.coeff <- as.data.frame(summary(glm.cl1)$coefficients)
cl2.coeff <- as.data.frame(summary(glm.cl2)$coefficients)
cl3.coeff <- as.data.frame(summary(glm.cl3)$coefficients)
cl4.coeff <- as.data.frame(summary(glm.cl4)$coefficients)
OR.table <- data.frame(Cluster1 = exp(cl1.coeff$Estimate), sig1 = cl1.coeff[,"Pr(>|z|)"],
                       Cluster2 = exp(cl2.coeff$Estimate), sig2 = cl2.coeff[,"Pr(>|z|)"],
                       Cluster3 = exp(cl3.coeff$Estimate), sig3 = cl3.coeff[,"Pr(>|z|)"],
                       Cluster4 = exp(cl4.coeff$Estimate), sig4 = cl4.coeff[,"Pr(>|z|)"])


###################################################
### code chunk number 57: mvad-OR-table
###################################################
OR.xtable <- xtable(OR.table, digits=3, align="|r|rrrrrrrr|")
rownames(OR.xtable) <- c("(Constant)", "Male", "Father unemployed", "Good end cs grade")
colnames(OR.xtable) <- c("Clust 1", "Sig", "Clust 2", "Sig", "Clust 3", "Sig", "Clust 4", "Sig")
##print(OR.xtable, floating=FALSE, caption.placement="top", hline.after=c(-1,0,0,nrow(OR.xtable)))
print(OR.xtable, floating=FALSE, caption.placement="top")

Try the TraMineR package in your browser

Any scripts or data that you put into this service are public.

TraMineR documentation built on Jan. 9, 2024, 3:02 p.m.