tag_co_occurrence: Explore Tag Co-Occurrence

Description Usage Arguments Value Author(s) See Also Examples

View source: R/tag_co_occurrence.R

Description

Explore tag co-occurrence. The resulting list comes with a plot method that allows the user to use a network graph to view the connections between tags as well as the average number of other tags that co-occur with each of the regex tags. This can provide information regarding the discriminatory power of each regex that corresponds to a tag. The plot_upset function can also be used for this sort of exploration.

Usage

1

Arguments

x

A term_count object.

...

ignored.

Value

Returns a list of:

ave_tag

A 2 column data.frame of tags and the average number of other tags that co-occur with it.

cor

A min-max scaled correlation matrix between tags; diagonals set to 0.

adjacency

An adjacency matrix between tags.

min_max_adjacency

A min-max scaled adjacency matrix between tags; diagonals set to 0.

node_size

The diagonals from the adjacency matrix; the number of times a tag occurred.

Author(s)

Steve T. Simpson and Tyler Rinker <tyler.rinker@gmail.com>.

See Also

plot_upset

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
## Not run: 
## Example 1
regs <- as_term_list(frequent_terms(presidential_debates_2012[["dialogue"]])[[1]])

model <- with(presidential_debates_2012,
    term_count(dialogue, TRUE, regs)
)

x <- tag_co_occurrence(model)
names(x)
setNames(
    lapply(names(x), function(a) {if(is.matrix(x[[a]])){ round(x[[a]], 2)} else { x[[a]] }}),
    names(x)
)
heatmap(x[["cor"]])
heatmap(x[["min_max_adjacency"]])
barplot(sort(x[["node_size"]], TRUE), las=2)
barplot(setNames(x[["ave_tag"]][[2]], x[["ave_tag"]][[1]]), las=2)

plot(x)
plot(x, cor=FALSE)
plot(x, min.edge.cutoff = .1, node.color = "#1CDB4F")
plot(x, min.edge.cutoff = .2, node.color = "gold", digits = 3)
plot(x, point.size.range = c(.5, 8))
plot(x, bar = TRUE)

## Compare to `plot_upset`
\dontrun{
plot_upset(model)
}

##===============================================
## Interactive chord diagram and network graph of
## tag co-occurrence
##===============================================

## Load Required Add-on Packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(igraph, qrage)
pacman::p_load_gh("mattflor/chorddiag", "trinker/textshape")

## Matrix Manipulation Function
remove_diags <- function(mat, rm.lower = FALSE, order = TRUE, ...) {
    diag(mat) <- 0
    if (isTRUE(rm.lower)) mat[lower.tri(mat)] <- 0
    if (order) {
        ord <- order(rowSums(mat))
        mat <- mat[ord, ord]
    }
    mat
}

##--------------
## Chord Diagram
##--------------
chorddiag::chorddiag(
    remove_diags(x[["adjacency"]]),
    margin = 150,
    showTicks =FALSE,
    groupnamePadding = 5,
    groupThickness = .05,
    chordedgeColor = NA
)

##--------------
## Network Graph
##--------------
graph <- igraph::graph.adjacency(
    remove_diags(x[["adjacency"]], order=FALSE),
    weighted = TRUE
)

linkdf <- stats::setNames(get.data.frame(graph), c("source", "target", "value"))

qrage::qrage(
    links = linkdf,
    nodeValue = textshape::tidy_vector(x[['node_size']]),
    cut = 0.1
)

## Example 2
regs2 <- as_term_list(frequent_terms(presidential_debates_2012[["dialogue"]], n=50)[[1]])

model2 <- with(presidential_debates_2012,
    term_count(dialogue, TRUE, regs2)
)

x2 <- tag_co_occurrence(model2)
plot(x2)
plot(x2, bar = FALSE, min.edge.cutoff = .13)
plot(x2, bar = FALSE, min.edge.cutoff = .18, node.color = "#ead453")
plot(x2, node.size.range = c(.1, 15))
plot(x2, edge.width.range = c(.1, 15), node.size.range = c(.1, 15))

plot(x2, edge.color = "gray99", node.color = "grey75", font.color = "white",
    background.color = "black")

## Small Number of Tags Example
plot(tag_co_occurrence(markers), node.size.range = 5, min.edge.cutoff = .08)

## End(Not run)

trinker/termco documentation built on Jan. 7, 2022, 3:32 a.m.