Installation

Install the package from GitHub, e.g.,

BiocManager::install("mtmorgan/mixpanel")

Load the package and dplyr

library(mixpanel)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
options(dplyr.summarise.inform = FALSE)

Configuration and localization

Use retrieve() to configure Google Drive access. Once authorized, retrieve() finds and localizes all .csv files in the MixPanel Google Drive folder. Files already cached are not downloaded a second time.

tbl <- retrieve()

Use

Use read() to concatenate several CSV files into a single tibble. Usually the CSV files summarize the same facet but in different months.

Use hash() to obscure (not cryptographically secure or recoverable in a separate R session) potentially sensitive information such as workspace or workflow names. unhash() can be used to recover original values, but only in the same session as the hash() call that generate the keys.

The following file titles are available for summary.

distinct(tbl, title)

Summary uses

Application (Galaxy, RStudio, Notebook) launches, per month.

app_launches <-
    filter(tbl, title == "Application Launches") |>
    read() |>
    group_by(Event, Month) |>
    summarize(n = sum(DateRangeCount))
app_launches

Application unique users, per month.

app_users <-
    filter(tbl, title == "Application Users") |>
    read() |>
    group_by(Event, Month) |>
    summarize(n = sum(DateRangeCount))
app_users

Workspace use, cummulative.

filter(tbl, title == "Cloned Workspaces") |>
    read() |>
    mutate(
        fromWorkspaceName = hash(fromWorkspaceName, except = "^Bioconductor")
    ) |>
    group_by(fromWorkspaceName) |>
    summarize(n = sum(DateRangeCount)) |>
    filter(n > 1L) |>
    arrange(desc(n))

Workflow import, launch, and re-run, per month.

workflow_events <-
    filter(tbl, title == "Workflow Actions") |>
    read() |>
    group_by(Event, Month) |>
    summarize(n = sum(DateRangeCount))
workflow_events

Workflow use, cummulative.

filter(tbl, title == "What workflows have been launched") |>
    read() |>
    mutate(methodPath = hash(methodPath)) |>
    group_by(methodPath) |>
    summarize(n = sum(DateRangeCount)) |>
    arrange(desc(n))

Workflow source (e.g., dockstore).

filter(tbl, title == "Workflow Import Source") |>
    read() |>
    group_by(source, Month) |>
    summarize(n = sum(DateRangeCount))

Application and workflow summary

app_clean <- function(event) {
    event <- sub(".*(Galaxy|RStudio).*", "\\1", event)
    sub(".*(Notebook).*", "\\1", event)
}

app_launches_users <-
    left_join(
        app_launches |>
            mutate(Event = app_clean(Event)) |>
            rename(Launches = n),
        app_users |>
            mutate(Event = app_clean(Event)) |>
            rename(Users = n),
        by = c("Event", "Month")
    ) |>
    mutate(Event = factor(Event, levels = c("Galaxy", "RStudio", "Notebook")))

all_launches_users <- bind_rows(
    app_launches_users,
    workflow_events |>
    filter(grepl("Launch", Event)) |>
    mutate(Event = sub(".*(Workflow).*", "\\1", Event)) |>
    rename(Launches = n)
)
ggplot(
    app_launches_users,
    aes(x = Month, y = Users, group = Event, fill = Event)
) +
    geom_bar(position = "stack", stat = "identity", width = .5) +
    ggtitle("Application users (unique)")
app_workflow_launches_users <-
    all_launches_users |>
    mutate(Event = ifelse(Event == "Workflow", Event, "Application")) |>
    group_by(Event, Month) |>
    summarize(Launches = sum(Launches), Users = sum(Users))

ggplot(
    app_workflow_launches_users,
    aes(x = Month, y = Launches, group = Event, fill = Event)
) +
    geom_bar(position = "stack", stat = "identity", width = .5) +
    ggtitle("Workflow and application launches")

Retention

The retention_report summarizes how many users from one particular day (or the monthly $average) are seen again in 1, 2, ... months.

filter(tbl, title ==  "retention_report") |>
    read(is_retention_report = TRUE)

## read only the '$average' row
monthly_retention <-
    filter(tbl, title ==  "retention_report", month != "Aug") |>
    read(n_max = 1) |>
    select(-Date) |>
    tidyr::pivot_longer(-Month, names_to = "Retained", values_to = "count") |>
    mutate(
        Retained = factor(Retained, levels = unique(Retained)),
        Month = factor(Month, levels = rev(levels(Month)))
    )

ggplot(
    monthly_retention,
    aes(Retained, Month, fill = count)
) +
    geom_raster() +
    scale_fill_distiller(palette = "Blues", direction = 1)

Bioconductor PopUp workshops where held on the following dates:

dates <- (as.Date("2021-04-26") + (0:6) * 7)[-6]
workshop_retention <-
    filter(tbl, title ==  "retention_report") |>
    read(is_retention_report = TRUE) |>
    filter(Date %in% dates) |>
    select(-Month, -"$date") |>
    tidyr::pivot_longer(-Date, names_to = "Retained", values_to = "count") |>
    mutate(
        Retained = factor(Retained, levels = unique(Retained)),
        Date = factor(
            as.character(Date),
            levels = rev(unique(as.character(Date)))
        )
    )

ggplot(
    workshop_retention,
    aes(Retained, Date, fill = count)
) +
    geom_raster() +
    scale_fill_distiller(palette = "Blues", direction = 1)

Session info {.unnumbered}

sessionInfo()


mtmorgan/mixpanel documentation built on Dec. 21, 2021, 11 p.m.