Install the package from GitHub, e.g.,
BiocManager::install("mtmorgan/mixpanel")
Load the package and dplyr
library(mixpanel) library(dplyr) library(RColorBrewer) library(ggplot2) options(dplyr.summarise.inform = FALSE)
Use retrieve()
to configure Google Drive access. Once authorized,
retrieve()
finds and localizes all .csv
files in the MixPanel
Google Drive folder. Files already cached are not downloaded a second
time.
tbl <- retrieve()
Use read()
to concatenate several CSV files into a single
tibble. Usually the CSV files summarize the same facet but in
different months.
Use hash()
to obscure (not cryptographically secure or recoverable
in a separate R session) potentially sensitive information such as
workspace or workflow names. unhash()
can be used to recover
original values, but only in the same session as the hash()
call
that generate the keys.
The following file titles are available for summary.
distinct(tbl, title)
Application (Galaxy, RStudio, Notebook) launches, per month.
app_launches <- filter(tbl, title == "Application Launches") |> read() |> group_by(Event, Month) |> summarize(n = sum(DateRangeCount)) app_launches
Application unique users, per month.
app_users <- filter(tbl, title == "Application Users") |> read() |> group_by(Event, Month) |> summarize(n = sum(DateRangeCount)) app_users
Workspace use, cummulative.
filter(tbl, title == "Cloned Workspaces") |> read() |> mutate( fromWorkspaceName = hash(fromWorkspaceName, except = "^Bioconductor") ) |> group_by(fromWorkspaceName) |> summarize(n = sum(DateRangeCount)) |> filter(n > 1L) |> arrange(desc(n))
Workflow import, launch, and re-run, per month.
workflow_events <- filter(tbl, title == "Workflow Actions") |> read() |> group_by(Event, Month) |> summarize(n = sum(DateRangeCount)) workflow_events
Workflow use, cummulative.
filter(tbl, title == "What workflows have been launched") |> read() |> mutate(methodPath = hash(methodPath)) |> group_by(methodPath) |> summarize(n = sum(DateRangeCount)) |> arrange(desc(n))
Workflow source (e.g., dockstore).
filter(tbl, title == "Workflow Import Source") |> read() |> group_by(source, Month) |> summarize(n = sum(DateRangeCount))
app_clean <- function(event) { event <- sub(".*(Galaxy|RStudio).*", "\\1", event) sub(".*(Notebook).*", "\\1", event) } app_launches_users <- left_join( app_launches |> mutate(Event = app_clean(Event)) |> rename(Launches = n), app_users |> mutate(Event = app_clean(Event)) |> rename(Users = n), by = c("Event", "Month") ) |> mutate(Event = factor(Event, levels = c("Galaxy", "RStudio", "Notebook"))) all_launches_users <- bind_rows( app_launches_users, workflow_events |> filter(grepl("Launch", Event)) |> mutate(Event = sub(".*(Workflow).*", "\\1", Event)) |> rename(Launches = n) )
ggplot( app_launches_users, aes(x = Month, y = Users, group = Event, fill = Event) ) + geom_bar(position = "stack", stat = "identity", width = .5) + ggtitle("Application users (unique)")
app_workflow_launches_users <- all_launches_users |> mutate(Event = ifelse(Event == "Workflow", Event, "Application")) |> group_by(Event, Month) |> summarize(Launches = sum(Launches), Users = sum(Users)) ggplot( app_workflow_launches_users, aes(x = Month, y = Launches, group = Event, fill = Event) ) + geom_bar(position = "stack", stat = "identity", width = .5) + ggtitle("Workflow and application launches")
The retention_report
summarizes how many users from one particular
day (or the monthly $average
) are seen again in 1, 2, ... months.
filter(tbl, title == "retention_report") |> read(is_retention_report = TRUE) ## read only the '$average' row monthly_retention <- filter(tbl, title == "retention_report", month != "Aug") |> read(n_max = 1) |> select(-Date) |> tidyr::pivot_longer(-Month, names_to = "Retained", values_to = "count") |> mutate( Retained = factor(Retained, levels = unique(Retained)), Month = factor(Month, levels = rev(levels(Month))) ) ggplot( monthly_retention, aes(Retained, Month, fill = count) ) + geom_raster() + scale_fill_distiller(palette = "Blues", direction = 1)
Bioconductor PopUp workshops where held on the following dates:
dates <- (as.Date("2021-04-26") + (0:6) * 7)[-6] workshop_retention <- filter(tbl, title == "retention_report") |> read(is_retention_report = TRUE) |> filter(Date %in% dates) |> select(-Month, -"$date") |> tidyr::pivot_longer(-Date, names_to = "Retained", values_to = "count") |> mutate( Retained = factor(Retained, levels = unique(Retained)), Date = factor( as.character(Date), levels = rev(unique(as.character(Date))) ) ) ggplot( workshop_retention, aes(Retained, Date, fill = count) ) + geom_raster() + scale_fill_distiller(palette = "Blues", direction = 1)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.