plot_upset: Plot 'term_count' Object as Upset Plot
In trinker/termco: Counts of Terms and Substrings

Description Usage Arguments Value Note References See Also Examples

Enables exploration of overlapping term categories which is useful for tasks such as improving discrimination (see also tag_co_occurrence). The upset plot is designed to allow for exploration of overlapping sets where Euler/Venn plots fail to scale. This function wraps the upset to allow for exploration of the degree to which categories from a term_count object overlap. This may help to collapse codes or to see how constructs are combined within the same text. The upset plot method is complex and requires careful study in order to lead to meaningful interpretations but the time invested can pay dividends in scalable insights.

1	plot_upset(x, text_funs = NULL, ...)

`x`	A `term_count` object.
`text_funs`	Additional list of named functions (names will be used for naming the variables created) for creating additional columns in the data set that can be used to add attributes plots to the `upset` output. These columns must be added but then called via ... using `upset` syntax. Note that `n.tags` & `n.words` is computed automatically without the need to pass a function in directly here.
`...`	Other arguments passed to `upset`.

Returns an uset plot.

Because upset has many arguments termco has opted to use ... to pass the arguments to plot_upset as it makes plot_upset easier to maintain as upset makes changes to its API. This means the plot_upset isn't that useful for understanding how the function operates. Use ?UpSetR::upset for a full list of the parameters that can be passed to termco::plot_upset. For example, sets enables more/less terms to be viewed, order.by specifies how the intersections between categories is arranged (default is number of tags), and nintersects hones in on how many intersects (top bar plot) can be viewed at one time. The default is 25. mb.ratio controls the spacing given to the top and lower pane (2 element numeric vector). By default plot_upset attempts to auto scale this based on the number of tags being displayed.

Jake R Conway, J. R, Lex, A., & Gehlenborg, N. (2017), UpSetR: An R package for the visualization of intersecting sets and their properties doi:10.1093/bioinformatics/btx364

http://caleydo.org/tools/upset

upset tag_co_occurrence

require(dplyr)
require(UpSetR)

term_list <- list(
    `if` = c('if'),
    ans = c('an'),
    or = c('or'),
    buts = c('but')
)

out <- presidential_debates_2012 %>%
     with(term_count(dialogue, TRUE, term_list))

plot_upset(out)

## Not run: 
plot_upset(out, order.by = c("freq", "degree"))
plot_upset(out, order.by = "degree")
plot_upset(out, order.by = "degree", decreasing = FALSE)

## Adjust top pane/lower pane spacing
plot_upset(out, mb.ratio = c(0.45, 0.55))
plot_upset(out, mb.ratio = c(0.85, 0.15))

plot_upset(out,
    queries = list(
        list(query = intersects, params = list("or"), color = "orange", active = TRUE),
        list(query = intersects, params = list("if", 'ans'), color = "#0099CC", active = TRUE)
    )
)


## Attributes plotting with built in text var measures
plot_upset(out,
    queries = list(
        list(query = intersects, params = list("or"), color = "orange", active = TRUE),
        list(query = intersects, params = list("if", 'ans'), color = "#0099CC", active = TRUE)
    ),
    attribute.plots = list(
        gridrows = 45,
        plots = list(
            list(
                plot = scatter_plot,
                x = "n.words",
                y = "n.tags",
                queries = TRUE
            )
        ),
        ncols = 1
    ),
    query.legend = "bottom"
)

## Attributes plotting:
## Compute your own text var measures
plot_upset(
    out,
    text_funs = list(n.chars = function(x) nchar(x)),

    main.bar.color = "gray60",
    sets.bar.color = "gray60",
    matrix.color = 'grey60',

    queries = list(
        list(query = intersects, params = list("or"), color = "orange", active = TRUE),
        list(query = intersects, params = list("if", 'ans'), color = "#0099CC", active = TRUE)
    ),
    attribute.plots = list(
        gridrows = 45,
        plots = list(
            list(
                plot = scatter_plot,
                x = "n.words",
                y = "n.tags",
                queries = TRUE
            ),
            list(
                plot = scatter_plot,
                x = "n.words",
                y = "n.chars",
                queries = TRUE
            ),
            list(
                plot = histogram,
                x = "n.words",
                queries = TRUE
            )
        ),
        ncols = 3
    ),
    query.legend = "bottom"
)

## More examples of computing your own text var measures
plot_upset(
    out,
    text_funs = list(
        sentiment = function(z){sentimentr::sentiment_by(z)$ave_sentiment}
    ),
    queries = list(
        list(query = intersects, params = list("or"), color = "orange", active = TRUE),
        list(query = intersects, params = list("if", 'ans'), color = "#0099CC", active = TRUE),
        list(query = intersects, params = list("buts", 'ans'), color = "#32CD32", active = TRUE)
    ),
    attribute.plots = list(
        gridrows = 45,
        plots = list(
            list(
                plot = scatter_plot,
                y = "sentiment",
                x = "n.tags.unique",
                queries = TRUE
            ),
            list(
                plot = scatter_plot,
                y = "sentiment",
                x = "n.tags",
                queries = TRUE
            ),
            list(
                plot = histogram,
                x = "sentiment",
                queries = TRUE
            )
        ),
        ncols = 3
    ),
    query.legend = "bottom"
)

plot_upset(
    out,
    text_funs = list(
        sentiment = function(z){sentimentr::sentiment_by(z)$ave_sentiment}
    ),
    queries = list(
        list(query = intersects, params = list("or"), color = "orange", active = TRUE),
        list(query = intersects, params = list("if", 'ans'), color = "#0099CC", active = TRUE),
        list(query = intersects, params = list("buts", 'ans'), color = "#32CD32", active = TRUE)
    ),
    boxplot.summary = c("sentiment")
)

## Demonstration of the auto scaling of the plot region
regs2 <- as_term_list(frequent_terms(presidential_debates_2012[["dialogue"]])[[1]])

model2 <- with(presidential_debates_2012,
    term_count(dialogue, TRUE, regs2)
)

plot_upset(model2)

regs3 <- as_term_list(frequent_terms(presidential_debates_2012[["dialogue"]], 60)[[1]])

model3 <- with(presidential_debates_2012,
    term_count(dialogue, TRUE, regs3)
)

plot_upset(model3)
plot_upset(model3, order.by = c("freq", "degree"), nintersects = 80)

## End(Not run)