Nothing
# Author: ianfellows
###############################################################################
wordcloud <- function(words,
freq,
scale = c(4, .5),
min.freq = 3,
max.words = Inf,
random.order = TRUE,
random.color = FALSE,
rot.per = .1,
colors = "black",
ordered.colors = FALSE,
use.r.layout = FALSE,
fixed.asp = TRUE,
...) {
if (!fixed.asp && rot.per > 0)
stop("Variable aspect ratio not supported for rotated words. Set rot.per=0.")
tails <- "g|j|p|q|y"
last <- 1
nc <- length(colors)
if (missing(freq)) {
requireNamespace("tm")
requireNamespace("slam")
#if(!require("tm"))
# stop("freq must either be non-missing, or the tm package must be available")
if (is.character(words) || is.factor(words)) {
corpus <- tm::Corpus(tm::VectorSource(words))
corpus <- tm::tm_map(corpus, tm::removePunctuation)
corpus <-
tm::tm_map(corpus, function(x)
tm::removeWords(x, tm::stopwords()))
} else
corpus <- words
tdm <- tm::TermDocumentMatrix(corpus)
freq <- slam::row_sums(tdm)
words <- names(freq)
}
if (ordered.colors) {
if (length(colors) != 1 && length(colors) != length(words)) {
stop(paste("Length of colors does not match length of words",
"vector"))
}
}
if (min.freq > max(freq))
min.freq <- 0
overlap <- function(x1, y1, sw1, sh1) {
if (!use.r.layout)
return(is_overlap(x1, y1, sw1, sh1, boxes))
s <- 0
if (length(boxes) == 0)
return(FALSE)
for (i in c(last, 1:length(boxes))) {
bnds <- boxes[[i]]
x2 <- bnds[1]
y2 <- bnds[2]
sw2 <- bnds[3]
sh2 <- bnds[4]
if (x1 < x2)
overlap <- x1 + sw1 > x2 - s
else
overlap <- x2 + sw2 > x1 - s
if (y1 < y2)
overlap <- overlap && (y1 + sh1 > y2 - s)
else
overlap <- overlap && (y2 + sh2 > y1 - s)
if (overlap) {
last <<- i
return(TRUE)
}
}
FALSE
}
ord <- rank(-freq, ties.method = "random")
words <- words[ord <= max.words]
freq <- freq[ord <= max.words]
if (ordered.colors) {
colors <- colors[ord <= max.words]
}
if (random.order)
ord <- sample.int(length(words))
else
ord <- order(freq, decreasing = TRUE)
words <- words[ord]
freq <- freq[ord]
words <- words[freq >= min.freq]
freq <- freq[freq >= min.freq]
if (ordered.colors) {
colors <- colors[ord][freq >= min.freq]
}
thetaStep <- .1
rStep <- .05
plot.new()
op <- par("mar")
par(mar = c(0, 0, 0, 0))
if (fixed.asp)
plot.window(c(0, 1), c(0, 1), asp = 1)
else
plot.window(c(0, 1), c(0, 1))
normedFreq <- freq / max(freq)
size <- (scale[1] - scale[2]) * normedFreq + scale[2]
boxes <- list()
for (i in 1:length(words)) {
rotWord <- runif(1) < rot.per
r <- 0
theta <- runif(1, 0, 2 * pi)
x1 <- .5
y1 <- .5
wid <- strwidth(words[i], cex = size[i], ...)
ht <- strheight(words[i], cex = size[i], ...)
#mind your ps and qs
if (grepl(tails, words[i]))
ht <- ht + ht * .2
if (rotWord) {
tmp <- ht
ht <- wid
wid <- tmp
}
isOverlaped <- TRUE
while (isOverlaped) {
if (!overlap(x1 - .5 * wid, y1 - .5 * ht, wid, ht) &&
x1 - .5 * wid > 0 && y1 - .5 * ht > 0 &&
x1 + .5 * wid < 1 && y1 + .5 * ht < 1) {
if (!random.color) {
if (ordered.colors) {
cc <- colors[i]
}
else {
cc <- ceiling(nc * normedFreq[i])
cc <- colors[cc]
}
} else {
cc <- colors[sample(1:nc, 1)]
}
text(
x1,
y1,
words[i],
cex = size[i],
offset = 0,
srt = rotWord * 90,
col = cc,
...
)
#rect(x1-.5*wid,y1-.5*ht,x1+.5*wid,y1+.5*ht)
boxes[[length(boxes) + 1]] <- c(x1 - .5 * wid, y1 - .5 * ht, wid, ht)
isOverlaped <- FALSE
} else{
if (r > sqrt(.5)) {
warning(paste(words[i],
"could not be fit on page. It will not be plotted."))
isOverlaped <- FALSE
}
theta <- theta + thetaStep
r <- r + rStep * thetaStep / (2 * pi)
x1 <- .5 + r * cos(theta)
y1 <- .5 + r * sin(theta)
}
}
}
par(mar = op)
invisible()
}
#Call down to c++ to find out if any overplotting would occur
#.overlap <- function(x11,y11,sw11,sh11,boxes1){
# .Call("is_overlap",x11,y11,sw11,sh11,boxes1)
#}
#a word cloud showing the common words among documents
commonality.cloud <-
function(term.matrix,
comonality.measure = min,
max.words = 300,
...) {
ndoc <- ncol(term.matrix)
for (i in 1:ndoc) {
term.matrix[, i] <- term.matrix[, i] / sum(term.matrix[, i])
}
freq <- apply(term.matrix, 1, function(x)
comonality.measure(x))
freq <- freq + min(freq)
wordcloud(rownames(term.matrix)[freq > 0],
freq[freq > 0],
min.freq = 0,
max.words = max.words,
...)
}
#a cloud comparing the frequencies of words across documents
comparison.cloud <-
function(term.matrix,
scale = c(4, .5),
max.words = 300,
random.order = FALSE,
rot.per = .1,
colors = brewer.pal(max(3, ncol(term.matrix)), "Dark2"),
use.r.layout = FALSE,
title.size = 3,
title.colors = NULL,
match.colors = FALSE,
title.bg.colors = "grey90",
...) {
ndoc <- ncol(term.matrix)
thetaBins <- seq(from = 0,
to = 2 * pi,
length = ndoc + 1)
for (i in 1:ndoc) {
term.matrix[, i] <- term.matrix[, i] / sum(term.matrix[, i])
}
mean.rates <- rowMeans(term.matrix)
for (i in 1:ndoc) {
term.matrix[, i] <- term.matrix[, i] - mean.rates
}
group <- apply(term.matrix, 1, function(x)
which.max(x))
words <- rownames(term.matrix)
freq <- apply(term.matrix, 1, function(x)
max(x))
tails <- "g|j|p|q|y"
last <- 1
nc <- length(colors)
overlap <- function(x1, y1, sw1, sh1) {
if (!use.r.layout)
return(is_overlap(x1, y1, sw1, sh1, boxes))
s <- 0
if (length(boxes) == 0)
return(FALSE)
for (i in c(last, 1:length(boxes))) {
bnds <- boxes[[i]]
x2 <- bnds[1]
y2 <- bnds[2]
sw2 <- bnds[3]
sh2 <- bnds[4]
if (x1 < x2)
overlap <- x1 + sw1 > x2 - s
else
overlap <- x2 + sw2 > x1 - s
if (y1 < y2)
overlap <- overlap && (y1 + sh1 > y2 - s)
else
overlap <- overlap && (y2 + sh2 > y1 - s)
if (overlap) {
last <<- i
return(TRUE)
}
}
FALSE
}
ord <- rank(-freq, ties.method = "random")
words <- words[ord <= max.words]
freq <- freq[ord <= max.words]
group <- group[ord <= max.words]
if (random.order) {
ord <- sample.int(length(words))
} else{
ord <- order(freq, decreasing = TRUE)
}
words <- words[ord]
freq <- freq[ord]
group <- group[ord]
thetaStep <- .05
rStep <- .05
plot.new()
op <- par("mar")
par(mar = c(0, 0, 0, 0))
plot.window(c(0, 1), c(0, 1), asp = 1)
normedFreq <- freq / max(freq)
size <- (scale[1] - scale[2]) * normedFreq + scale[2]
boxes <- list()
#add titles
docnames <- colnames(term.matrix)
if (!is.null(title.colors)) {
title.colors <- rep(title.colors, length.out = ndoc)
}
title.bg.colors <- rep(title.bg.colors, length.out = ndoc)
for (i in 1:ndoc) {
th <- mean(thetaBins[i:(i + 1)])
word <- docnames[i]
wid <- strwidth(word, cex = title.size) * 1.2
ht <- strheight(word, cex = title.size) * 1.2
x1 <- .5 + .45 * cos(th)
y1 <- .5 + .45 * sin(th)
rect(x1 - .5 * wid,
y1 - .5 * ht,
x1 + .5 * wid,
y1 + .5 * ht,
col = title.bg.colors[i],
border = "transparent")
if (is.null(title.colors)) {
if (match.colors) {
text(x1, y1, word, cex = title.size, col = colors[i])
} else{
text(x1, y1, word, cex = title.size)
}
} else{
text(x1, y1, word, cex = title.size, col = title.colors[i])
}
boxes[[length(boxes) + 1]] <- c(x1 - .5 * wid, y1 - .5 * ht, wid, ht)
}
for (i in 1:length(words)) {
rotWord <- runif(1) < rot.per
r <- 0
theta <- runif(1, 0, 2 * pi)
x1 <- .5
y1 <- .5
wid <- strwidth(words[i], cex = size[i], ...)
ht <- strheight(words[i], cex = size[i], ...)
#mind your ps and qs
if (grepl(tails, words[i]))
ht <- ht + ht * .2
if (rotWord) {
tmp <- ht
ht <- wid
wid <- tmp
}
isOverlaped <- TRUE
while (isOverlaped) {
inCorrectRegion <-
theta > thetaBins[group[i]] && theta < thetaBins[group[i] + 1]
if (inCorrectRegion && !overlap(x1 - .5 * wid, y1 - .5 * ht, wid, ht) &&
x1 - .5 * wid > 0 && y1 - .5 * ht > 0 &&
x1 + .5 * wid < 1 && y1 + .5 * ht < 1) {
text(
x1,
y1,
words[i],
cex = size[i],
offset = 0,
srt = rotWord * 90,
col = colors[group[i]],
...
)
#rect(x1-.5*wid,y1-.5*ht,x1+.5*wid,y1+.5*ht)
boxes[[length(boxes) + 1]] <- c(x1 - .5 * wid, y1 - .5 * ht, wid, ht)
isOverlaped <- FALSE
} else{
if (r > sqrt(.5)) {
warning(paste(words[i],
"could not be fit on page. It will not be plotted."))
isOverlaped <- FALSE
}
theta <- theta + thetaStep
if (theta > 2 * pi)
theta <- theta - 2 * pi
r <- r + rStep * thetaStep / (2 * pi)
x1 <- .5 + r * cos(theta)
y1 <- .5 + r * sin(theta)
}
}
}
par(mar = op)
invisible()
}
wordlayout <- function(x,
y,
words,
cex = 1,
rotate90 = FALSE,
xlim = c(-Inf, Inf),
ylim = c(-Inf, Inf),
tstep = .1,
rstep = .1,
...) {
tails <- "g|j|p|q|y"
n <- length(words)
sdx <- sd(x, na.rm = TRUE)
sdy <- sd(y, na.rm = TRUE)
if (sdx == 0)
sdx <- 1
if (sdy == 0)
sdy <- 1
if (length(cex) == 1)
cex <- rep(cex, n)
if (length(rotate90) == 1)
rotate90 <- rep(rotate90, n)
boxes <- list()
for (i in 1:length(words)) {
rotWord <- rotate90[i]
r <- 0
theta <- runif(1, 0, 2 * pi)
x1 <- xo <- x[i]
y1 <- yo <- y[i]
wid <- strwidth(words[i], cex = cex[i], ...)
ht <- strheight(words[i], cex = cex[i], ...)
#mind your ps and qs
if (grepl(tails, words[i]))
ht <- ht + ht * .2
if (rotWord) {
tmp <- ht
ht <- wid
wid <- tmp
}
isOverlaped <- TRUE
while (isOverlaped) {
if (!is_overlap(x1 - .5 * wid, y1 - .5 * ht, wid, ht, boxes) &&
x1 - .5 * wid > xlim[1] && y1 - .5 * ht > ylim[1] &&
x1 + .5 * wid < xlim[2] && y1 + .5 * ht < ylim[2]) {
boxes[[length(boxes) + 1]] <- c(x1 - .5 * wid, y1 - .5 * ht, wid, ht)
isOverlaped <- FALSE
} else{
theta <- theta + tstep
r <- r + rstep * tstep / (2 * pi)
x1 <- xo + sdx * r * cos(theta)
y1 <- yo + sdy * r * sin(theta)
}
}
}
result <- do.call(rbind, boxes)
colnames(result) <- c("x", "y", "width", "ht")
rownames(result) <- words
result
}
textplot <-
function(x,
y,
words,
cex = 1,
new = TRUE,
show.lines = TRUE,
...) {
if (new)
plot(x, y, type = "n", ...)
lay <- wordlayout(x, y, words, cex, ...)
if (show.lines) {
for (i in 1:length(x)) {
xl <- lay[i, 1]
yl <- lay[i, 2]
w <- lay[i, 3]
h <- lay[i, 4]
if (x[i] < xl || x[i] > xl + w ||
y[i] < yl || y[i] > yl + h) {
points(x[i],
y[i],
pch = 16,
col = "red",
cex = .5)
nx <- xl + .5 * w
ny <- yl + .5 * h
lines(c(x[i], nx), c(y[i], ny), col = "grey")
}
}
}
text(lay[, 1] + .5 * lay[, 3], lay[, 2] + .5 * lay[, 4], words, cex = cex, ...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.