library(dplyr)
library(ggplot2)
library(ggvis)
library(viridis)

devtools::load_all(".")
installs = fetch_install_invocations()
summary(installs)
popularity = popularity_by_package(installs)
summary(popularity)
build_errors = fetch_build_errors()
summary(build_errors)
error_frequency = build_errors %>%
  select(package=eventAction, totalEvents) %>%
  group_by(package) %>%
  summarize(failures=sum(totalEvents))

popularity = left_join(popularity, error_frequency)
summarize(popularity)
popularity %>%
  filter(!is.na(failures)) %>%
  ggvis(~n, ~failures, key:=~package) %>%
    layer_points() %>%
    scale_numeric("x", trans="log", expand=0) %>%
    add_tooltip(function(x) { paste(x$package) })
popularity %>%
  filter(!is.na(failures)) %>%
  ggplot(aes(n, failures, label=package)) +
    scale_x_log10() +
    geom_text()
g = popularity %>%
  filter(n > 500) %>%
  mutate(failed_fraction=failures/n) %>%
  arrange(desc(failed_fraction)) %>%
  head(100) %>%
  mutate(package=factor(package, rev(package))) %>%
  ggplot(aes(package, failed_fraction*100, fill=log2(n))) +
    geom_bar(stat="identity") +
    scale_fill_viridis("Installs") +
    labs(y="% of installs that fail") +
    coord_flip()
print(g)
build_errors %>%
  rename(package=eventAction) %>%
  filter(package %in% c("xz", "python")) %>%
  mutate(
    os_version=normalize_macos_version(eventLabel) %>% major_minor,
    default_prefix=factor(
      grepl("non-/usr/local", eventLabel),
      c(FALSE, TRUE),
      c("Default", "Non-default"))
  ) %>%
  filter(sapply(os_version, compareVersion, "10.6") >= 0) %>%
  ggplot(aes(package, totalEvents, fill=os_version)) +
    facet_grid(package~default_prefix, scales="free") +
    geom_bar(stat="identity", width=0.5) +
    coord_flip()
date_popularity = (function(df) {
  invocations = stringr::str_split(df$eventAction, " ")
  df$package = sapply(invocations, function(x) x[[1]])
  df %>% count(date, package, wt=totalEvents)
})(installs)

date_popularity = left_join(
  date_popularity,
  build_errors %>%
    select(date, package=eventAction, totalEvents) %>%
    group_by(date, package) %>%
    summarize(failures=sum(totalEvents))
)
to_plot = c("xz", "python", "go")
commits = lapply(to_plot, function(x) {
    dates = commits_since(x)
    if(length(dates) == 0) {
      data.frame()
    } else {
      data.frame(package=x, date=commits_since(x))
    }
  }) %>%
  dplyr::bind_rows()

date_popularity %>%
  filter(package %in% to_plot) %>%
  ggplot(aes(date, failures/n, color=package)) +
    facet_grid(package~.) +
    ylim(0, 0.1) +
    geom_point() +
    geom_line(alpha=0.3) +
    geom_vline(mapping=aes(xintercept=as.numeric(date)), data=commits)


tdsmith/HomebrewGA documentation built on May 31, 2019, 7:56 a.m.