#' Calculate Euclidean distances between documents
#'
#' @description Generates a dendrogram of document clusters, a network plot of document-document relationships, and a graphml file to open with Gephi. For use with JSTOR's Data for Research datasets (http://dfr.jstor.org/).
#' @param lda the object returned by the function JSTOR_lda.
#' @return Returns plots of the document clusters and network and a graphml file in the working directory that can be opened with Gephi
#' @examples
#' ## JSTOR_lda_docdists(lda = lda150)
#' @import cluster igraph
JSTOR_lda_docdists <- function(lda){
# unpack output from JSTOR_lda
topic.props <- lda[[1]]
# if want to take logs, so adjust zeros to avoid -Inf
# topic.props[topic.props == 0] <- 0.0000000000001
#### Euclidean distance matrix on topics
doc.props.dists1 <- as.matrix(daisy(((topic.props[, !(colnames(topic.props) %in% c("ID","year"))])), metric = "euclidean", stand = TRUE))
# Change row values to zero if less than row minimum plus row standard deviation
# This is how Jockers subsets the distance matrix to keep only
# closely related documents and avoid a dense spagetti diagram
# that's difficult to interpret (hat-tip: http://stackoverflow.com/a/16047196/1036500)
doc.props.dists2 <- doc.props.dists1
doc.props.dists1[sweep(doc.props.dists1, 1, (apply(doc.props.dists1,1,min) + apply(doc.props.dists1,1,sd) )) > 0 ] <- 0
## dendrogram
plot(hclust(dist(doc.props.dists2)), xlab = "document clusters", sub = "", main = "", labels = topic.props$ID, cex = 0.1)
## network plot
g <- as.undirected(graph.adjacency(doc.props.dists1))
layout1 <- layout.fruchterman.reingold(g, niter=500)
plot(g, layout=layout1, edge.curved = TRUE, vertex.size = 1, vertex.color= "grey", edge.arrow.size = 0.1, vertex.label.dist=0.5, vertex.label = NA)
return(doc.props.dists1)
write.graph(g, file="docs.graphml", format="graphml")
message(paste0("The docs.graphml file for Gephi can be found in ", getwd()))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.