In neuropsychology/psycho.R: Efficient and Publishing-Oriented Workflow for Psychological Science

library(knitr)

The Data

library(tidyverse)
library(psycho)

df <- psycho::affective %>%  # Load the dataset available in the psycho package
  select(Sex, Life_Satisfaction)  # Select two variables

summary(df)  # Print a summary

The Model

fit <- glm(Sex ~ Life_Satisfaction, data=df, family="binomial")

Inspect the model

analyze(fit)

This shows the Nakagawa's R2 (Nakagawa & Schielzeth, 2013) of the total model (the "explanatory power", as speaking of \% of variance explained hardly makes sense in the context of binomial data). It also returns the intercept (the value of the dependent variable when all variables are 0). It returns the estimations of the parameters, including the coefficient, standart error, confidence interval, z and p value. Critically, it also computes standardized coefficients and uses them as effect size indices, categorized using Cohen 1988's heuristics (by default).

Plot the model

refdata <- psycho::refdata(df, "Life_Satisfaction")

predicted_data <- psycho::get_predicted(fit, newdata=refdata)

# Plot
predicted_data %>% 
  ggplot(aes(y=Sex_Predicted, x=Life_Satisfaction)) +
  geom_line()

Better Plot

predicted_data %>% 
  ggplot(aes(y=Sex_Predicted, x=Life_Satisfaction)) +
  geom_line() +
  geom_ribbon(aes(ymin=Sex_CI_2.5,
                  ymax=Sex_CI_97.5),
                  alpha=0.1) +
  theme_classic() +
  ylab("Probability of being a Male")

refdata <- psycho::refdata(df, "Life_Satisfaction", length.out = 100)

predicted_data <- psycho::get_predicted(fit, newdata=refdata, prob=c(0.8, 0.9, 0.95, 0.99))

predicted_data %>% 
  ggplot(aes(y=Sex_Predicted, x=Life_Satisfaction)) +
  geom_line() +
  geom_ribbon(aes(ymin=Sex_CI_10,
                  ymax=Sex_CI_90),
                  alpha=0.1) +
  geom_ribbon(aes(ymin=Sex_CI_5,
                  ymax=Sex_CI_95),
                  alpha=0.1) +
  geom_ribbon(aes(ymin=Sex_CI_2.5,
                  ymax=Sex_CI_97.5),
                  alpha=0.1) +
    geom_ribbon(aes(ymin=Sex_CI_0.5,
                  ymax=Sex_CI_99.5),
                  alpha=0.1) +
  theme_classic() +
  ylab("Probability of being a Male") +
  xlab("Life Satisfaction")

Mixed-Model Logistic Regression

library(tidyverse)
library(lme4)
library(psycho)

df <- psycho::emotion %>%  # Load the dataset available in the psycho package
  select(Participant_ID, Participant_Sex, Recall, Subjective_Arousal)  # Select two variables

summary(df)  # Print a summary

fit <- lme4::glmer(Recall ~ Subjective_Arousal + (1|Participant_ID), data=df, family="binomial")

analyze(fit)

refdata <- psycho::refdata(df, "Subjective_Arousal")

predicted_data <- psycho::get_predicted(fit, newdata=refdata, prob=c(0.8, 0.9, 0.95, 0.99))

predicted_data %>% 
  ggplot(aes(y=Sex_Median, x=Life_Satisfaction)) +
  geom_line() +
  geom_ribbon(aes(ymin=Sex_CI_10,
                  ymax=Sex_CI_90),
                  alpha=0.1) +
  geom_ribbon(aes(ymin=Sex_CI_5,
                  ymax=Sex_CI_95),
                  alpha=0.1) +
  geom_ribbon(aes(ymin=Sex_CI_2.5,
                  ymax=Sex_CI_97.5),
                  alpha=0.1) +
    geom_ribbon(aes(ymin=Sex_CI_0.5,
                  ymax=Sex_CI_99.5),
                  alpha=0.1) +
  theme_classic() +
  ylab("Probability of being a Male") +
  xlab("Life Satisfaction")

Simulation

library(tidyverse)

simulate_logistic_data <- function(n=1000, noise=0){
   x <- rnorm(n)  # some continuous variables 
   z <- x  # linear combination with a bias
   pr <- 1/(1+exp(-z))  # pass through an inv-logit function
   if(noise != 0){
     pr <- pr + rnorm(n, 0, noise*sd(pr))
     pr[pr > 1] <- 1
     pr[pr < 0] <- 0
   }
   y <- rbinom(n, 1, pr)  # bernoulli response variable
   df <- data.frame(x=x, y=y)
   return(df)
}





noise <- 0
data <- data.frame()
for(n in seq(6, 250)){
  print(n)

  for(i in seq(20)){
      # Generate data
      df <- simulate_logistic_data(n, noise=noise)

      # Change scale randomly
      df$x <- df$x * runif(1, 0, 1000) + runif(1, 0, 1000)

      fit = tryCatch({
        glm(y~x,data=df, family="binomial")
      }, warning = function(w) {
        0
      }, error = function(e) {
        0
      }, message = function(m) {
        0
      })

    if(is.numeric(fit)){
      next
    }

    for(method in c("chen2010", "cohen1988")){
      rez <- psycho::analyze(fit, effsize_rules = method)
      rez <- rez$summary[2,] %>% 
        select(Coef, Coef.std, p, Effect_Size) %>% 
        mutate(Method = method,
               n = n,
               Noise = noise,
               i = paste(i, n, sep="_"))
      data <- rbind(data, rez)
    }
  }
}

data %>% 
  mutate(Coef.std = abs(Coef.std),
         Effect_Size = forcats::fct_rev(Effect_Size)) %>% 
  ggplot(aes(y=Coef.std, x=Effect_Size, fill=Method)) +
  geom_violin() +
  ylab("Standardized Log Odds") +
  ylim(0, 4)

data %>% 
  mutate(Coef.std = abs(Coef.std),
         Effect_Size = forcats::fct_rev(Effect_Size)) %>% 
  ggplot(aes(y=Coef.std, x=n, colour=Effect_Size)) +
  geom_point() +
  ylab("Standardized Log Odds") +
  ylim(0, 4)

Contribute

Of course, these reporting standards should change, depending on new expert recommandations or official guidelines. The goal of this package is to flexibly adaptive to new changes and good practices evolution. Therefore, if you have any advices, opinions or such, we encourage you to either let us know by opening an issue, or even better, try to implement them yourself by contributing to the code.

Credits

This package helped you? Don't forget to cite the various packages you used :)

You can cite psycho as follows:

Makowski, (2018). The psycho Package: An Efficient and Publishing-Oriented Workflow for Psychological Science. Journal of Open Source Software, 3(22), 470. https://doi.org/10.21105/joss.00470