Nothing
## ---- echo = FALSE, message = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
knitr::opts_chunk$set(fig.width = 7, fig.height = 5)
options(tibble.print_min = 4L, tibble.print_max = 4L)
## ---- message = FALSE---------------------------------------------------------
library(jsonlite)
library(dplyr)
library(purrr)
library(magrittr)
library(forcats)
library(ggplot2)
library(igraph)
library(RColorBrewer)
library(wordcloud)
library(viridis)
library(listviewer)
library(tidyjson)
set.seed(1)
## -----------------------------------------------------------------------------
co_length <- companies %>% json_complexity
## -----------------------------------------------------------------------------
co_length %>%
ggplot(aes(complexity)) +
geom_density() +
scale_x_log10() +
annotation_logticks(side = 'b')
## -----------------------------------------------------------------------------
co_examp_index <- which(co_length$complexity == 20L)[1]
co_examp <- companies[co_examp_index]
co_examp
## -----------------------------------------------------------------------------
co_examp %>% jsonedit(mode = "code")
## -----------------------------------------------------------------------------
co_struct <- companies %>% sample(5) %>% json_structure
print(co_struct)
## -----------------------------------------------------------------------------
co_names <- co_struct %>%
filter(type != "null" & !is.na(name)) %>%
group_by(level, name, type) %>%
summarize(ndoc = n_distinct(document.id))
co_names
## -----------------------------------------------------------------------------
co_names %$% wordcloud(name, ndoc, scale = c(1.5, .1), min.freq = 100)
## ---- fig.height = 9----------------------------------------------------------
co_names %>%
ungroup %>%
group_by(type) %>%
arrange(desc(ndoc), level) %>%
mutate(rank = 1:n()) %>%
ggplot(aes(1, rank)) +
geom_text(aes(label = name, color = ndoc)) +
scale_y_reverse() +
facet_grid(. ~ type) +
theme_void() +
theme(legend.position = "bottom") +
scale_color_viridis(direction = -1)
## -----------------------------------------------------------------------------
# Plots an igraph visualization of a JSON document
#
# @param .x a JSON string or tbl_json object
# @param legend add a type color legend automatically
# @param vertex.size the size of the vertices
# @param edge.color the color for the edges
# @param edge.width the width of the edge lines
# @param show.labels should object names be shown
# @param plot should the plot be rendered?
# @param ... further arguments to igraph::plot.igraph
plot_json_graph <- function(.x, legend = TRUE, vertex.size = 6,
edge.color = 'grey70', edge.width = .5,
show.labels = TRUE, plot = TRUE,
...) {
if (!is.tbl_json(.x)) .x <- as.tbl_json(.x)
if (nrow(.x) != 1) stop("nrow(.x) not equal to 1")
structure <- .x %>% json_structure
type_colors <- RColorBrewer::brewer.pal(6, "Accent")
graph_edges <- structure %>%
filter(!is.na(parent.id)) %>%
select(parent.id, child.id)
graph_vertices <- structure %>%
transmute(child.id,
vertex.color = type_colors[as.integer(type)],
vertex.label = name)
if (!show.labels)
graph_vertices$vertex.label <- rep(NA_character_, nrow(graph_vertices))
g <- igraph::graph_from_data_frame(graph_edges, vertices = graph_vertices,
directed = FALSE)
if (plot) {
op <- par(mar = c(0, 0, 0, 0))
plt <- igraph::plot.igraph(g,
vertex.color = igraph::V(g)$vertex.color,
vertex.size = vertex.size,
vertex.label = igraph::V(g)$vertex.label,
vertex.frame.color = NA,
layout = layout_with_kk,
edge.color = edge.color,
edge.width = edge.width,
...)
if (legend)
legend(x = -1.3, y = -.6, levels(structure$type), pch = 21,
col= "white", pt.bg = type_colors,
pt.cex = 2, cex = .8, bty = "n", ncol = 1)
par(op)
}
invisible(g)
}
## -----------------------------------------------------------------------------
'{"object" : {"name": 1},
"array" : ["a", "b"],
"string" : "value",
"number" : 1,
"logical": true,
"null" : null}' %>%
plot_json_graph
## -----------------------------------------------------------------------------
co_examp %>% plot_json_graph
## ---- fig.height = 8----------------------------------------------------------
plot_json_graph_panel <- function(json, nrow, ncol, ...) {
# Set up grid
op <- par(mfrow = c(nrow, ncol))
indices <- seq_along(json) %>% keep(`<=`, nrow * ncol)
for (i in indices) {
plot_json_graph(json[[i]], ...)
if ("names" %in% names(attributes(json)))
title(names(json)[i], col.main = 'red')
}
par(op)
invisible(NULL)
}
## ---- fig.height = 8----------------------------------------------------------
plot_json_graph_panel(companies %>% sample(5), 7, 6, legend = FALSE, show.labels = FALSE,
vertex.size = 4)
## -----------------------------------------------------------------------------
most_complex <- companies[which(co_length$complexity == max(co_length$complexity))]
most_complex_name <- most_complex %>%
spread_values(name = jstring(name)) %>%
extract2("name")
## -----------------------------------------------------------------------------
plot_json_graph(most_complex, show.labels = FALSE, vertex.size = 2)
## -----------------------------------------------------------------------------
most_complex %>% json_schema %>% jsonedit(mode = "code")
## -----------------------------------------------------------------------------
most_complex %>% json_schema(type = "value") %>% plot_json_graph
## -----------------------------------------------------------------------------
most_complex %>% gather_object %>% json_types %>% json_complexity %>%
filter(type %in% c('array', 'object') & complexity >= 15) %>%
split(.$name) %>%
map(json_schema, type = "value") %>%
plot_json_graph_panel(3, 3, legend = FALSE)
## -----------------------------------------------------------------------------
rounds <- companies %>%
enter_object(funding_rounds) %>%
gather_array %>%
spread_values(
round = jstring(round_code),
currency = jstring(raised_currency_code),
raised = jnumber(raised_amount)
)
rounds %>% head
## -----------------------------------------------------------------------------
geos <- companies %>%
enter_object(offices) %>%
gather_array %>%
spread_values(
country = jstring(country_code),
state = jstring(state_code),
description = jstring(description)
)
geos %>% head
## -----------------------------------------------------------------------------
hqs <- geos %>%
filter(array.index == 1) %>%
filter(country == "USA") %>%
select(document.id, state)
rounds_usd <- rounds %>%
filter(currency == "USD") %>%
filter(!is.na(raised)) %>%
select(document.id, round, raised)
rounds_by_geo <- inner_join(rounds_usd, hqs, by = "document.id") %>% as_tibble()
## -----------------------------------------------------------------------------
round_prep <- rounds_by_geo %>%
dplyr::filter(!is.na(state)) %>%
mutate(
round = round %>% forcats::fct_collapse(
"angel" = c("seed", "angel"),
"d-f" = c("d", "e", "f"),
"other" = c("grant", "partial", "post_ipo_equity", "private_equity",
"debt_round", "unattributed")
) %>% forcats::fct_relevel("angel", "a", "b", "c", "d-f", "other")
) %>%
mutate(
state = state %>% forcats::fct_lump(2)
)
g <- ggplot(round_prep, aes(state, raised, fill = state)) +
geom_violin() +
scale_y_log10() +
annotation_logticks(side = 'l') +
facet_grid(. ~ round) +
theme(legend.position = "bottom") +
labs(x = "", y = "Amount Raised (USD)")
g
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.