View source: R/dissimilarity.R
Dissimilarity | R Documentation |
Uses the distance function to calculate dissimilarity statistics by grouping variables.
Dissimilarity(
text.var,
grouping.var = NULL,
method = "prop",
diag = FALSE,
upper = FALSE,
p = 2,
...
)
text.var |
A text variable or word frequency matrix object. |
grouping.var |
The grouping variables. Default |
method |
Distance methods (see |
diag |
logical. If |
upper |
logical. If |
p |
The power of the Minkowski distance. |
... |
Other arguments passed to |
Returns a matrix of dissimilarity values (the agreement between text).
dist
## Not run:
with(DATA, Dissimilarity(state, list(sex, adult)))
with(DATA, Dissimilarity(state, person, diag = TRUE))
## Clustering: Dendrogram
(x <- with(pres_debates2012, Dissimilarity(dialogue, list(person, time))))
fit <- hclust(x)
plot(fit)
## draw dendrogram with red borders around the 3 clusters
rect.hclust(fit, k=3, border=c("red", "purple", "seagreen"))
## Clustering: Dendrogram with p.values
library(pvclust)
wfm.mod <- with(pres_debates2012, wfm(dialogue, list(person, time)))
fit <- suppressMessages(pvclust(wfm.mod, method.hclust="ward",
method.dist="euclidean"))
plot(fit)
pvrect(fit, alpha=.95)
## Multidimentional Scaling
## Based on blog post from Bodong Chen
## http://bodongchen.com/blog/?p=301
## Fit it: 2-D
(diss <- with(pres_debates2012, Dissimilarity(dialogue, list(person, time),
method = "euclidean")))
fit <- cmdscale(diss, eig = TRUE, k = 2)
## Plot it 2-D
points <- data.frame(x = fit$points[, 1], y = fit$points[, 2])
ggplot(points, aes(x = x, y = y)) +
geom_point(data = points, aes(x = x, y = y, color = rownames(points))) +
geom_text(data = points, aes(x = x, y = y - 0.2, label = row.names(points)))
## Fit it: 3-D
library(scatterplot3d)
fit <- cmdscale(diss, eig = TRUE, k = 3)
points <- data.frame(colSplit(names(fit$points[, 1])))
library(qdapTools)
points$colors <- points$X1 %l% data.frame(levels(points$X1),
qcv(yellow, yellow, blue, yellow, red, yellow))
points$shape <- points$X2 %l% data.frame(levels(points$X2), c(15, 17, 19))
## Plot it: 3-D
scatterplot3d(fit$points[, 1], fit$points[, 2], fit$points[, 3],
color = points$colors, pch = points$shape,
main = "Semantic Space Scaled to 3D", xlab = "x", ylab = "y",
zlab = "z", type = "h")
legend("bottomright", title="Person",
qcv(Obama, Romney, Other), fill=qcv(blue, red, yellow))
legend("topleft", paste("Time", 1:3), pch=c(15, 17, 19))
## Compare to Cosine Similarity
cos_sim <- function(x, y) x %*% y / sqrt(x%*%x * y%*%y)
mat <- matrix(rbinom(500, 0:1, .45), ncol=10)
v_outer(mat, cos_sim)
v_outer(with(DATA, wfm(state, person)), cos_sim)
with(DATA, Dissimilarity(state, person))
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.