Nothing
# - - - - - - - - - - - - - - - - - -
# demo: social network analysis
# - - - - - - - - - - - - - - - - - -
# 1) create a demo data set
im = matrix(0, nrow=9, ncol=12)
rownames(im) = c("Paul", "Joanna", "Maximilian", "Peter", "Christina", "Simon", "Ida", "Thomas", "Alba")
colnames(im) = c("OU-CS", "UR-Informatik", "MOOC-PED", "MOOC-TEL", "MOOC-Math", "OU-PED", "MOOC-ocTEL",
"MOOC-LAK", "OU-Statistics", "Facebook-Statistics", "Facebook-TEL", "Linkedin-CS")
im[1, ] = c(1,1,0,0,0,0,0,0,0,0,0,1) # paul
im[2, ] = c(0,0,1,0,0,1,0,0,0,0,1,0) # joanna
im[3, ] = c(0,0,0,0,1,0,0,0,0,1,0,0) # max
im[4, ] = c(0,0,0,1,0,1,1,1,0,0,0,0) # peter
im[5, ] = c(0,0,1,1,0,1,1,1,0,0,0,0) # christina
im[6, ] = c(0,1,0,0,0,0,0,0,0,0,0,1) # simon
im[7, ] = c(0,0,0,0,0,0,0,0,1,1,0,0) # ida
im[8, ] = c(0,0,0,0,0,0,0,0,1,1,0,0) # thomas
im[9, ] = c(0,0,0,1,0,0,1,1,0,0,0,0) # alba
# 2) plot sociograms
par(mar=c(3,2,3,2))
par(mfrow=c(2,2))
par(cex.lab=0.6)
plot(network::network(im%*%t(im)), displaylabels=T, vertex.cex=2, main="Affiliations") # three communities
colors = c(rep("red",nrow(im)),rep("white",ncol(im)))
plot(network::network(im), displaylabels=T, vertex.col=colors, vertex.cex=2, main="Incidences") # the communities are: TEL, CS, Math
# 3) manipulate data: merge courses
TEL = c( "OU-CS", "UR-Informatik", "Linkedin-CS", "MOOC-PED","OU-PED", "MOOC-TEL","MOOC-ocTEL","MOOC-LAK", "Facebook-TEL")
STATS = c("MOOC-Math", "OU-Statistics", "Facebook-Statistics")
im_new = cbind(
rowSums( im[, TEL] ),
rowSums( im[, STATS] )
)
colnames(im_new) = c("ALL-TEL", "ALL-STATS")
# 4) plot sociograms (again)
plot(network::network(im_new %*% t(im_new), directed=FALSE), displaylabels=TRUE, vertex.cex=2, main="Affiliations (manipulated)")
colors = c(rep("red",nrow(im)),rep("white",ncol(im)))
plot(network::network(im_new>0, directed=FALSE), displaylabels=TRUE, vertex.col=colors, vertex.cex=2, main="Incidences (manipulated)")
# - - - - - - - - - - - - - - - - - -
# demo: latent semantic space
# - - - - - - - - - - - - - - - - - -
# create a demo data set
docs = matrix(nrow=0, ncol=2)
colnames(docs) = c("id", "title")
docs = rbind( docs, c("c1", "a web interface for social media applications")) # web social interface
docs = rbind( docs, c("c2", "Review of access time restrictions on web system usage")) # web access review system time user
docs = rbind( docs, c("c3", "Content management system usage of the HTML 5 interface")) # interface system html management
docs = rbind( docs, c("c4", "Error spotting in HTML: social system versus software system")) # social 2x system html
docs = rbind( docs, c("c5", "Barriers to access and time spent in social mobile apps")) # access time social
docs = rbind( docs, c("m1", "The generation of random unordered trees")) # trees
docs = rbind( docs, c("m2", "A survey of divisive clustering along the intersection of partial trees")) # trees clustering intersection
docs = rbind( docs, c("m3", "Width and height of trees in using agglomerative clustering with Agnes")) # trees clustering agglomerative
docs = rbind( docs, c("m4", "Agglomerative clustering algorithms: a review")) # clustering agglomerative review
docs = rbind( docs, c("p1", "The intersection of learning and organisational knowledge sharing"))
docs = rbind( docs, c("p2", "A transactional perspective on teaching and learning"))
docs = rbind( docs, c("p3", "Innovations in online learning: moving beyond no significant difference"))
docs = rbind( docs, c("p4", "Tacit knowledge management in organisational learning"))
docs = rbind( docs, c("p5", "Knowledge building: theory, pedagogy, and technology"))
# create doc term matrix
docs2 = docs[,2]
docs2 = tolower(docs2)
docs2 = gsub("[^[:alnum:]]", " ", docs2)
docs2 = gsub("[[:space:]]+", " ", docs2)
docs2 = lapply(docs2, function (e) { unlist(strsplit(e, " ")) })
data(lsa::stopwords_en)
docs2 = lapply(docs2, function (e) { e[! (e %in% lsa::stopwords_en) ] })
tabs = lapply(docs2, function(e){sort(table(e), dec=T)})
tabs2 = lapply(tabs, function(e) { data.frame(docs = "", terms = names(e), Freq = e, row.names = NULL) })
for (i in 1:nrow(docs)) { tabs2[[i]][,1]= docs[i,1] }
dtm = t(stats::xtabs(Freq ~ ., data = do.call("rbind", tabs2)))
dtm = dtm[-which(rowSums(dtm)<=1),] # at least in more than a single document
dtm
# create lsa space with 3 dimensions
space = lsa::lsa(dtm, dims=3)
dtm2 = lsa::as.textmatrix(space)
class(dtm2) = c("matrix", "textmatrix")
# get a bit of info on which cells are properly filled (>=0.3)
round(((dtm2>=0.3) * 1) * dtm2,1)
# measure proximity between docs
prox = cosine(dtm2)
round(prox) # shows 3 clear clusters
# compare with original proximity in pure vector space
cosine(dtm)
# visualise difference between these tables
par(mfrow=c(1,2))
par(mar=c(2,2,1,1))
graphics::image(round(1-cosine(dtm),1), col=gray(1:10/10), xaxt="n", yaxt="n")
graphics::axis(1, at=seq(0/ncol(dtm), 1, by=1/(ncol(dtm)-1))[1:ncol(dtm)], labels=colnames(dtm), cex.axis=0.7)
graphics::axis(2, at=seq(0/ncol(dtm), 1, by=1/(ncol(dtm)-1))[1:ncol(dtm)], labels=colnames(dtm), cex.axis=0.7)
graphics::image(round(1-cosine(dtm2),1), col=gray(1:10/10), xaxt="n", yaxt="n")
graphics::axis(1, at=seq(0/ncol(dtm2), 1, by=1/(ncol(dtm2)-1))[1:ncol(dtm2)], labels=colnames(dtm2), cex.axis=0.7)
graphics::axis(2, at=seq(0/ncol(dtm2), 1, by=1/(ncol(dtm2)-1))[1:ncol(dtm2)], labels=colnames(dtm2), cex.axis=0.7)
# plot it using the plot proposal from the seminal paper on LSA (Deerwester et al., 1989)
cf = 1
xmax = max(c(space$tk[,1], space$dk[,1]))
xmin = min(c(space$tk[,1], space$dk[,1]))
ymax = max(c(space$tk[,2], space$dk[,2]))
ymin = min(c(space$tk[,2], space$dk[,2]))
zmax = max(c(space$tk[,3], space$dk[,3]))
zmin = min(c(space$tk[,3], space$dk[,3]))
ymin = ymin - 0.05
xmin = xmin - 0.05
zmin = zmin - 0.05
ymax = ymax + 0.05
xmax = xmax + 0.05
zmax = zmax + 0.05
mdisty = (ymax-ymin)*0.02
mdistx = (xmax-xmin)*0.02
mdistz = (zmax-zmin)*0.02
plot(space$tk[,1], space$tk[,2], ylim=c(ymin, ymax), xlim=c(xmin, xmax), xlab="factor 1", ylab="factor 2", pch=1, cex=cf, lwd=cf, cex.axis=cf, cex.lab=cf, col="red")
graphics::points(space$dk[,1], space$dk[,2], pch=2, col="blue")
graphics::text(space$dk[,1]+mdistx, space$dk[,2]-mdisty, labels=rownames(space$dk), col="blue", cex=0.7 )
graphics::text(space$tk[,1]-mdistx, space$tk[,2]+mdisty, labels=rownames(space$tk), col="red", cex=cf )
graphics::abline(h=0, lty="dotted", lwd=cf)
graphics::abline(v=0, lty="dotted", lwd=cf)
# plot with all 3 factors in a perspective plot (and label)
par(mar=c(1,0.1,0.1,0.1))
p = graphics::persp(
x=-1:1,y=-1:1,
z=matrix(
c(
-1,0,1,
-1,0,1,
1,0,-1
),3,3),
col="transparent", border="transparent",
xlim = range(c( space$dk[,1], space$tk[,1])),
ylim = range(c( space$dk[,2], space$tk[,2])),
zlim = range(c( space$dk[,3], space$tk[,3])),
theta=35, phi=20,
xlab="dim 1", ylab="dim 2", zlab="dim 3",
expand=0.5, scale=F,
axes=TRUE, nticks=10, ticktype="simple"
)
graphics::points( grDevices::trans3d(space$dk[,1], space$dk[,2], space$dk[,3], pmat=p), bg="darkgray", col="darkgray", pch=22, cex=1)
graphics::points( grDevices::trans3d(space$tk[,1], space$tk[,2], space$tk[,3], pmat=p), bg="black", col="black", pch=21, cex=1)
graphics::text( grDevices::trans3d(space$tk[,1]-mdistx, space$tk[,2]-mdisty, space$tk[,3]-mdistz, pmat=p), rownames(space$tk), col="black", cex=0.8)
graphics::text( grDevices::trans3d(space$dk[,1]-mdistx, space$dk[,2]-mdisty, space$dk[,3]-mdistz, pmat=p), rownames(space$dk), col="darkgray", cex=0.8)
# fold-in demo
lsa::query("Review of the html user interface of the system", rownames(dtm))
# - - - - - - - - - - - - - - - - - - - - - - - -
# mpia space
dmgr = DomainManager()
id = dmgr$add(space, title="memospace")
d = dmgr$get("memospace")
# create data (binding training description -> person attending)
dmsg = c( 1,2,10,3,6,11,4,7,8,9,12,5 )
cbind( docs[dmsg,1], as.matrix(colnames(im)))
thedocs = cbind( docs[dmsg,1], as.matrix(colnames(im)), docs[dmsg,2])
ppl = HumanResourceManager(domainmanager=dmgr, domain=d)
for (p in rownames(im)) {
assign(tolower(p), ppl$add(name=p))
for (pf in which(im[p,]>0)) {
#get(tolower(p))$write(docs[dmsg,][pf,2], label=docs[dmsg,][pf,1], purpose=colnames(im)[pf])
get(tolower(p))$write(thedocs[pf,3], label=thedocs[pf,1], purpose=colnames(im)[pf])
}
}
thedocs2 = cbind( as.matrix(colnames(im)),docs[dmsg,2] )
rownames(thedocs2) = docs[dmsg,1]
colnames(thedocs2) = c("Training", "Description")
thedocs2 = thedocs2[sort(rownames(thedocs2), index.return=TRUE)$ix, ]
# - - - - - - - - - - - - - - - - - -
# visual analysis: first demo
par(mfrow=c(2,2))
plot(d, method="persp", rotated=TRUE)
par(mar=c(1,1,1,1))
title(main="The MPIA space")
toponymy(d, method="all", add=TRUE, col="black")
plot(d, method="persp", rotated=TRUE)
par(mar=c(1,1,1,1))
title(main="Peter's path (of performance records)")
toponymy(d, method="all", add=TRUE, col="black")
plot(path(peter), col="white")
# two performance records are identical!
peter[1] == peter[3]
plot(d, method="persp", rotated=TRUE)
par(mar=c(1,1,1,1))
title(main="Peter's competences and position")
plot(competences(peter), col="red", component.labels=FALSE, connect=FALSE, alpha=1)
plot(position(peter), col="orange")
plot(d, method="persp", rotated=TRUE)
par(mar=c(1,1,1,1))
title(main="Peter's learning path (in shades of gray)")
pf = path(peter)
cs = grDevices::gray(seq(1,0.5,length.out=length(pf) ))
for (p in 1:length(pf)) {
plot(pf[[p]], col=cs[p])
}
# - - - - - - - - - - - - - - - - - - - - - - - -
# investigate positions and paths
d$visualiser$closeDevice()
d$visualiser$newDevice()
plot.new()
par(mar=c(0,0,0,0))
toponymy(d, method="all", add=FALSE, col="darkgray", grid.col="black")
plot(peter, col="darkgreen")
plot(christina, col="darkgreen")
plot(path(maximilian), col="orange")
plot(maximilian, col="orange")
plot(path(joanna), col="red")
plot(joanna, col="red")
plot(path(alba), col="purple")
plot(alba, col="purple")
plot(ida, col="lightgreen")
plot(simon, col="brown")
proximity(simon, alba)
near(simon, alba)
proximity(simon, joanna)
near(simon, joanna)
near(ppl, christina)
replacements = sapply(near(ppl, christina), names)
round( proximity(ppl)['Christina', replacements], 1 )
overlap(path(christina), path(alba))
overlap(path(christina), path(peter))
overlap(competences(christina), competences(alba))
overlap(competences(christina), competences(peter))
# - - - - - - - - - - - - - - - - - -
# detect potential competences
plot.new()
toponymy(d, method="all", add=FALSE, col="darkgray", grid.col="black")
plot(competences(christina), col="red", connect=FALSE)
plot(competences(peter), col="green", connect=FALSE)
plot(competences(simon), col="yellow", connect=FALSE)
# show cluster dendrogram
ps = performances(ppl)
a = cluster::agnes( proximity(ps), diss=FALSE )
plot(a, which.plots=2, main="Cluster dendrogram over all meaningvectors", xlab="Meaningvectors", sub="", cex.main=1.2)
graphics::abline(h=1-d$identityThreshold, lty="dashed", col="darkgreen", lwd=1.5)
graphics::text(x=11, y=1-d$identityThreshold+0.2, adj=0.5, "cutoff", col="darkgreen", cex=1)
# - - - - - - - - - - - - - - - - - -
# detect groups
plot(d, method="topographic", rotated=FALSE)
toponymy(d, method="all", col="black")
groups(ppl)
# show group positions
plot(position(ppl$groups[[1]]), col="pink")
plot(position(ppl$groups[[5]]), col="red")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.