easystats

knitr::opts_chunk$set(
  dpi = 600,
  collapse = TRUE,
  fig.path = "man/figures/"
)
options(knitr.kable.NA = '',
        digits = 4,
        width=60)
# knitr::include_graphics(c("https://github.com/easystats/insight/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/bayestestR/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/parameters/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/performance/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/modelbased/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/correlation/raw/master/man/figures/logo.png",
#                           "https://github.com/easystats/report/raw/master/man/figures/logo.png"))
# 

# library(png)
# library(grid)
# library(gridExtra)
# library(RCurl)
# 
# insight <-  rasterGrob(as.raster(readPNG(getURLContent("https://github.com/easystats/insight/raw/master/man/figures/logo.png"))), interpolate = FALSE)
# bayestestR <-  rasterGrob(as.raster(readPNG(getURLContent("https://github.com/easystats/bayestestR/raw/master/man/figures/logo.png"))), interpolate = FALSE)
# grid.arrange(insight, bayestestR, ncol = 1)







The aim of easystats is to provide a unifying and consistent framework to tame, discipline and harness the scary R statistics and their pesky models.

Installation

The whole easystats suite can be installed at once with the following:

install.packages("devtools")
devtools::install_github("easystats/easystats")
library("easystats")

Features

Find an overview of all postings here.

Dependencies

Most of easystats packages are very lightweight, i.e., they don't rely nor import any other packages! This means that you can safely use them as dependencies in your own packages, without the risk of butterfly effects (a small change in a distant downstream dependency with unexpected upstream consequences).

library(tidyverse)
library(devtools)
library(miniCRAN)
library(igraph)
library(ggnetwork)
library(intergraph)


pkg <- devtools::as.package(".")

dependencies <- c("insight", "performance", "bayestestR", "parameters", "correlation", "report", "modelbased", "effectsize")

dependency_graph <- miniCRAN::makeDepGraph(dependencies, suggests = FALSE, enhances = FALSE)
class(dependency_graph) <- "igraph"
dependency_graph <- dependency_graph + igraph::vertices(pkg$package) + igraph::edges(as.vector(rbind(dependencies, pkg$package)))
dependency_graph <- igraph::simplify(dependency_graph)

edge_list <- igraph::get.edgelist(dependency_graph)
# dependency_graph <- igraph::graph(rbind(edge_list[, 2], edge_list[, 1]))  # This fails at my place

dependency_graph_df <- ggnetwork::ggnetwork(
  dependency_graph
  , layout = "fruchtermanreingold"
  , arrow.gap = 0.015
  , layout.par = list(niter = 5000)
)

dependency_graph_df$package <- dependency_graph_df$vertex.names
dependency_graph_df$face <- ifelse(dependency_graph_df$package == pkg$package, "bold", "plain")

dependency_graph_df$n_dependencies <- as.vector(table(gsub("\\|.+", "", attr(igraph::E(dependency_graph), "vnames")))[as.character(dependency_graph_df$package)])
dependency_graph_df$n_dependencies[is.na(dependency_graph_df$n_dependencies)] <- 0

dependency_graph_df$importance <- as.vector(table(gsub(".+\\|", "", attr(E(dependency_graph), "vnames")))[as.character(dependency_graph_df$package)])
dependency_graph_df$importance[is.na(dependency_graph_df$importance)] <- 0

max_downstream_deps <- max(dependency_graph_df$importance)

dependency_graph_df$importance <- dependency_graph_df$importance / max_downstream_deps
dependency_graph_df$importance <- abs(1 - dependency_graph_df$importance)

dependency_graph_df <- as.data.frame(lapply(dependency_graph_df, as.vector))

ggplot(dependency_graph_df, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_nodes(aes(color = n_dependencies), size = 6.5, alpha = 0.4) +
  geom_edges(arrow = arrow(length = unit(4, "pt"), type = "closed"), color = grey(0.4)) +
  geom_nodelabel_repel(
    aes(label = package, fontface = face, color = n_dependencies)
    , box.padding = unit(8, "pt")
  ) +
  geom_nodes(aes(color = n_dependencies, size = 7 * importance)) +
  scale_color_distiller(palette = "Spectral") +
  scale_size(labels = function(x) abs(max_downstream_deps - ceiling(x / 7 * max_downstream_deps))) +
  theme_blank(legend.position = "top") +
  guides(
    size = guide_legend(title = "Downstream dependencies", title.position = "top", title.hjust = 0.5, label.position = "bottom", label.hjust = 0.5)
    , color = guide_colorbar(title = "Upstream dependencies", title.position = "top", title.hjust = 0.5, barwidth = unit(130, "pt"), barheight = unit(4, "pt"))
  )

There is one exception. The see package is one of our high-level packages that is responsible for plotting and creating figures, relying thus on other packages such as ggplot2, which itself is plugged in the tidyverse, increasing package dependencies by a substantial amount. On the bright side of things, it gives a good overview of the place of easystats in the R ecosystem.

library(tidyverse)
library(devtools)
library(miniCRAN)
library(igraph)
library(ggnetwork)
library(intergraph)


pkg <- devtools::as.package(".")
# dependencies <- unlist(strsplit(paste(pkg$imports, "\n", pkg$depends), split = "\n"))[-1]
# dependencies <- trimws(gsub("\\n| \\(.+\\)|,", "", dependencies))

dependencies <- c(
  "insight", "bayestestR", "performance", "parameters", "see",
  "correlation", "modelbased", "report", "effectsize"
)

dependency_graph <- miniCRAN::makeDepGraph(dependencies, suggests = FALSE, enhances = FALSE)
class(dependency_graph) <- "igraph"
dependency_graph <- dependency_graph + igraph::vertices(pkg$package) + igraph::edges(as.vector(rbind(dependencies, pkg$package)))
dependency_graph <- igraph::simplify(dependency_graph)

edge_list <- igraph::get.edgelist(dependency_graph)
# dependency_graph <- igraph::graph(rbind(edge_list[, 2], edge_list[, 1]))

dependency_graph_df <- ggnetwork::ggnetwork(
  dependency_graph
  , layout = "fruchtermanreingold"
  , arrow.gap = 0.015
  , layout.par = list(niter = 5000)
)

dependency_graph_df$package <- dependency_graph_df$vertex.names
dependency_graph_df$face <- ifelse(dependency_graph_df$package == pkg$package, "bold", "plain")

dependency_graph_df$n_dependencies <- as.vector(table(gsub("\\|.+", "", attr(igraph::E(dependency_graph), "vnames")))[as.character(dependency_graph_df$package)])
dependency_graph_df$n_dependencies[is.na(dependency_graph_df$n_dependencies)] <- 0

dependency_graph_df$importance <- as.vector(table(gsub(".+\\|", "", attr(E(dependency_graph), "vnames")))[as.character(dependency_graph_df$package)])
dependency_graph_df$importance[is.na(dependency_graph_df$importance)] <- 0

max_downstream_deps <- max(dependency_graph_df$importance)

dependency_graph_df$importance <- dependency_graph_df$importance / max_downstream_deps
dependency_graph_df$importance <- abs(1 - dependency_graph_df$importance)

dependency_graph_df <- as.data.frame(lapply(dependency_graph_df, as.vector))

ggplot(dependency_graph_df, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_nodes(aes(color = n_dependencies), size = 6.5, alpha = 0.4) +
  geom_edges(arrow = arrow(length = unit(4, "pt"), type = "closed"), color = grey(0.4)) +
  geom_nodelabel_repel(
    aes(label = package, fontface = face, color = n_dependencies)
    , box.padding = unit(8, "pt")
  ) +
  geom_nodes(aes(color = n_dependencies, size = 7 * importance)) +
  scale_color_distiller(palette = "Spectral") +
  scale_size(labels = function(x) abs(max_downstream_deps - ceiling(x / 7 * max_downstream_deps))) +
  theme_blank(legend.position = "top") +
  guides(
    size = guide_legend(title = "Downstream dependencies", title.position = "top", title.hjust = 0.5, label.position = "bottom", label.hjust = 0.5)
    , color = guide_colorbar(title = "Upstream dependencies", title.position = "top", title.hjust = 0.5, barwidth = unit(130, "pt"), barheight = unit(4, "pt"))
  )

Citation

How to reference easystats?

  1. Cite specific packages

The most parsimonious approach is to cite only the particular package that helped you, e.g., "using bayestestR (Makowski, Ben-Shachar, & Lüdecke, 2019)". However, as easystats is meant to be an ecosystem, with different people working on its different aspects (some being more "citeable" than others), please consider including also the "main" publication: not available yet.

  1. Cite the whole ecosystem

Want to really help us boost our h-index? Or simply credit the whole network of interconnected aspects of easystats? This can be done with a sentence like the following:

Data processing was carried out with R (R Core Team, 2019) and the easystats ecosystem (Lüdecke, Waggoner, & Makowski, 2019; Makowski, Ben-Shachar, & Lüdecke, 2019)

The corresponding bibtex entries are the following:

@article{ludecke2019insight,
    journal = {Journal of Open Source Software},
    doi = {10.21105/joss.01412},
    issn = {2475-9066},
    number = {38},
    publisher = {The Open Journal},
    title = {insight: A Unified Interface to Access Information from Model Objects in R},
    url = {http://dx.doi.org/10.21105/joss.01412},
    volume = {4},
    author = {Lüdecke, Daniel and Waggoner, Philip and Makowski, Dominique},
    pages = {1412},
    date = {2019-06-25},
    year = {2019},
    month = {6},
    day = {25},
}


@article{makowski2019bayestestr,
    title = {{bayestestR}: {Describing} {Effects} and their {Uncertainty}, {Existence} and {Significance} within the {Bayesian} {Framework}},
    volume = {4},
    issn = {2475-9066},
    shorttitle = {{bayestestR}},
    url = {https://joss.theoj.org/papers/10.21105/joss.01541},
    doi = {10.21105/joss.01541},
    number = {40},
    urldate = {2019-08-13},
    journal = {Journal of Open Source Software},
    author = {Makowski, Dominique and Ben-Shachar, Mattan and Lüdecke, Daniel},
    month = aug,
    year = {2019},
    pages = {1541}
}

Versioning

Package version numbers indicate following: MAJOR.MINOR.PATCH.DEVELOPMENT. As long as packages are in a more or less rapidly developing and changig state, the major version number is typically 0. Once we think we will have a stable base that will likely not change dramatically or soon, the major version number will be set to 1, and increased for following major changes that probably break the current API. When new features are added or (re)moved, we typicaly increase the minor version number. Minimal changes or bug fixes only are indicated by increasing the patch version number. Current development versions of our poackages (i.e. master branchs from GitHub) additionally have a development version number. You typically won't find packages on CRAN with a development version number.

Downloads

library(tidyverse)
library(zoo)
library(cranlogs)
library(rstanarm)
library(see)
library(ggrepel)

# Packages data
downloads_insight <- cranlogs::cran_downloads(package = "insight", from = "2019-02-26") %>% 
  mutate(Package = "insight")
downloads_bayestestR <- cranlogs::cran_downloads(package = "bayestestR", from = "2019-04-08") %>% 
  mutate(Package = "bayestestR")
downloads_performance <- cranlogs::cran_downloads(package = "performance", from = "2019-04-22") %>% 
  mutate(Package = "performance")
downloads_see <- cranlogs::cran_downloads(package = "see", from = "2019-05-22") %>% 
  mutate(Package = "see")
downloads_parameters <- cranlogs::cran_downloads(package = "parameters", from = "2019-08-26") %>% 
  mutate(Package = "parameters")
downloads_effectsize <- cranlogs::cran_downloads(package = "effectsize", from = "2019-11-15") %>% 
  mutate(Package = "effectsize")
downloads_modelbased <- cranlogs::cran_downloads(package = "modelbased", from = "2020-01-12") %>% 
  mutate(Package = "modelbased")

# Combine all data
data <- rbind(downloads_insight,
              downloads_bayestestR,
              downloads_performance,
              downloads_see,
              downloads_parameters,
              downloads_effectsize,
              downloads_modelbased) %>% 
  group_by(Package) %>% 
  mutate(cumulative_count = cumsum(count),
         average_count = zoo::rollmax(count, 10, fill=0)+10,
         day_num = as.numeric(date) - min(as.numeric(date)),
         day = weekdays(date),
         month = months(date),
         quarters = quarters(date),
         month_day = lubridate::mday(date)) %>% 
  ungroup() %>% 
  mutate(Package = fct_relevel(Package, "insight", "bayestestR", "performance", "see", "parameters", "effectsize", "modelbased"))


events <- data.frame()
color_CRAN <- "#607D8B"
color_blog <- "#9C27B0"

# Publications
events <- rbind(events,
               data.frame(date = as.Date(c("2019-03-05",
                                           "2019-03-29",
                                           "2019-04-09",
                                           "2019-04-23",
                                           "2019-04-24",
                                           "2019-05-11",
                                           "2019-05-24",
                                           "2019-05-29",
                                           "2019-06-19",
                                           "2019-06-25",
                                           "2019-07-01",
                                           "2019-08-05",
                                           "2019-08-26",
                                           "2019-09-26",
                                           "2019-11-15",
                                           "2020-01-12"
                                           )), 
                          event=c("CRAN - insight (0.1.0)", 
                                  "CRAN - insight (0.2.0)",
                                  "CRAN - bayestestR (0.1.0)",
                                  "JOSS - insight (submission)",
                                  "CRAN - performance (0.1.0)",
                                  "CRAN - insight (0.3.0)",
                                  "CRAN - see (0.1.0)",
                                  "CRAN - bayestestR (0.2.0)",
                                  "CRAN - see (0.2.0)",
                                  "JOSS - insight (publication)",
                                  "CRAN - insight (0.4.0)",
                                  "CRAN - performance (0.3.0)",
                                  "CRAN - parameters (0.1.0)",
                                  "CRAN - parameters (0.2.0)",
                                  "CRAN - effectsize (0.0.1)",
                                  "CRAN - modelbased (0.1.0)"
                                  ), 
                          Package=c("insight", 
                                    "insight",
                                    "bayestestR",
                                    "insight",
                                    "performance",
                                    "insight",
                                    "see",
                                    "bayestestR",
                                    "see",
                                    "insight",
                                    "insight",
                                    "performance",
                                    "parameters",
                                    "parameters",
                                    "effectsize",
                                    "modelbased"
                                    ), 
                          color=color_CRAN, stringsAsFactors = FALSE))
# Blogposts
blogposts <- tidyRSS::tidyfeed("https://easystats.github.io/blog/categories/r/index.xml") %>% 
  filter(item_date_published > "2019-03-29") %>% 
  select(date = item_date_published,
         event = item_title) %>% 
  mutate(color=color_blog) %>% 
  distinct()

blogposts$Package <- rev(c("insight", "bayestestR", "bayestestR", "performance", "bayestestR", "see", "bayestestR", "bayestestR", "performance", "parameters", "parameters", "parameters", "bayestestR"))


events <- rbind(events, blogposts)

# Full join
data <- full_join(data, events, by = c("date", "Package"))

events <- full_join(
  events,
  group_by(data, event) %>% 
    summarise(event_y = max(average_count)), by = "event") %>% 
    filter(!is.na(date))

# Set packages colours
packages_colours <- c("insight" = unname(see::material_colors("orange")),
                      "bayestestR" = unname(see::material_colors("pink")),
                      "performance" = unname(see::material_colors("green")),
                      "see" = unname(see::material_colors("blue")),
                      "parameters" = unname(see::material_colors("purple")),
                      "effectsize" = unname(see::material_colors("brown")),
                      "modelbased" = unname(see::material_colors("lime")))

Trend

# TIME TREND
data %>%
  group_by(Package) %>% 
  slice(1:(n()-1)) %>%  # Remove 2 last days to avoid edge issues
  ungroup() %>%  
  ggplot(aes(x = date, color = Package)) +
  geom_line(aes(y = count), size = 0.5, alpha = 0.1) +
  geom_smooth(aes(y = count),
              method = 'loess',
              size = 1.75,
              se = FALSE) +
  geom_smooth(aes(y = count),
              method = 'lm',
              size = 0.75,
              se = FALSE) +
  # geom_linerange(data=events, aes(x = date, ymin = 0, ymax = event_y), colour = events$color, size = .5, linetype="dashed") +
  # geom_label_repel(data=events, aes(label = event, x = date, y = event_y), fill = events$color, colour = "white", segment.colour = events$color) +
  geom_text_repel(
    data = events,
    aes(label = event, x = date, y = event_y),
    colour = events$color,
    segment.colour = events$color,
    size = 3
  ) +
  see::theme_modern() +
  scale_x_date(date_breaks = "1 month",
               labels = scales::date_format("%Y-%m")) +
  scale_color_manual(values = packages_colours) +
  xlab("") +
  ylab("Daily CRAN Downloads\n") +
  coord_cartesian(ylim = c(0, max(data$count) + 100), expand = FALSE) +
  scale_y_sqrt()

Cumulative downloads

data$download_label <- NA

data <- data %>% 
  group_by(Package, month) %>% 
  mutate(download_label = first(cumulative_count)) %>% 
  ungroup()

data$download_label[duplicated(data$download_label)] <- NA

ggplot(data, aes(x = date, y = cumulative_count, label = download_label, color = Package)) +
  geom_line(size=0.75) +
  geom_label_repel(show.legend = FALSE) +
  labs(x = "", y = "Cumulative CRAN Downloads", label = NULL) +
  see::theme_modern() +
  scale_x_date(date_breaks = "1 month", labels = scales::date_format("%Y-%m")) +
  scale_color_manual(values=packages_colours) +
  scale_y_sqrt()

Convention of code-style

Following conventions apply to the easystats-ecosystem, to ensure that function and argument names as well as element names for return-values follow a consistent pattern across all packages.

Importing other packages

Helper-functions

Function names

Argument names

Element / Column names (for returned data frames)

1) First letter of the column name is capital, unless (6) applies (example: Parameter) 2) First letter of nouns is capital, unless (6) applies (example: ROPE_Percentage, Prior_Scale) 3) Using underscore rather than camelCase to separate words (example: CI_high) 4) Multiple words: common/main part first and adjective/specifier/variational part after (example: Median_standardized, ROPE_percentage) 5) Abbreviations: all uppercase (example: ESS, MCSE, ROPE) 6) Keep conventions for reserved words (example: p, pd, Rhat) 7) Adjectives / verbs: all lower case, unless (1) applies (example: high or low in CI_high or CI_low)

List of functions

# it would be cool to add the title / description for all functions
library(insight)
library(bayestestR)
library(parameters)
library(performance)
library(correlation)
library(modelbased)
library(effectsize)
library(report)

all_funs <- data.frame()

for (package in c("insight", "bayestestR", "parameters", "performance", "effectsize", "correlation", "modelbased", "report", "see")) {

  name <- ls(paste0("package:", package))

  functions <- paste0("[**`", 
                      name, 
                      "`**](https://easystats.github.io/", 
                      package, 
                      "/reference/index.html)",
                      " *(",
                      package,
                      ")*")

  functions <- data.frame("Functions" = functions,
                          "Package" = package,
                          "Name" = name)


  all_funs <- rbind(all_funs, functions)
}



all_funs <- all_funs[!duplicated(all_funs$Name),]
all_funs <- sort(as.character(all_funs$Functions))
cat(paste0(c("", all_funs), collapse = "\n- "))


tnewman63/easystats documentation built on Jan. 24, 2020, 12:11 a.m.