options(knitr.table.format = "html")
knitr::opts_chunk$set(echo = TRUE, results="as-is", collapse=TRUE, warning = FALSE, fig.width = 12, fig.height = 8)

Demos

1 Introduction

The following R scripts provides some examples of RplotterPkg::scatter_plot().

library(ggplot2)
library(patchwork)
library(magrittr)
library(purrr)
library(scales)
library(dplyr)
library(modelr)
library(tidyr)
library(RplotterPkg)

2 Fit and plot a linear model for the sim1 dataset

glimpse(sim1)
Task: Define a linear model for sim1.
model_indep_sim1 <- lm(y ~ x, data = sim1)
summary(model_indep_sim1)
Task: Compute the model_indep_sim1 predictions.
sim1_pred_df <- modelr::add_predictions(data = sim1, model = model_indep_sim1)
glimpse(sim1_pred_df)
Task: Plot both x vs y along with the pred values as a connected line.
plot_sim1_pred <- scatter_plot(
  df = sim1_pred_df,
  aes_x = "x",
  aes_y = "y",
  title = "Linear line fit for sim1 data",
  subtitle = "model: lm(y ~ x)",
  x_limits = c(1,10,1),
  y_limits = c(0,30,2),
  pts_color = "blue",
  pts_size = 2,
  pts_alpha = 0.4,
  connect = "pred"
)
plot_sim1_pred

2 Residuals from data set sim1

Task: Compute residuals of the linear model model_indep_sim1.
residuals_sim1_df <- sim1 %>%
  add_residuals(model_indep_sim1)
glimpse(residuals_sim1_df)
Task: Plot x vs resid using scatter_plot().
plot_sim1_resid <- scatter_plot(
  df = residuals_sim1_df,
  aes_x = "x",
  aes_y = "resid",
  title = "Residuals for sim1 data",
  subtitle = "model: lm(y ~ x)",
  y_label = "Residuals",
  x_limits = c(1,10,1),
  y_limits = c(-5,5,1),
  pts_color = "blue",
  pts_size = 2,
  pts_alpha = 0.4
)
plot_sim1_resid

3 Residuals from data set sim3 where there is a factor

glimpse(sim3)

x1 and x2 are independent variables where x2 is a factor variable.

Task: Define a linear model for sim3.
model_indep_sim3 <- lm(y ~ x1 + x2, data = sim3)
summary(model_indep_sim3)
Task: Compute residuals of the linear model model_indep_sim3.
residuals_sim3_df <- sim3 %>%
  add_residuals(model_indep_sim3)
glimpse(residuals_sim3_df)
Task: Plot x1 vs resid for different levels of x2
plot_sim3_resid <- scatter_plot(
  df = residuals_sim3_df,
  aes_x = "x1",
  aes_y = "resid",
  title = "Residuals across x2 levels for sim3 data",
  subtitle = "model: lm(y ~ x1 + x2)",
  y_label = "Residuals",
  x_limits = c(1,10,1),
  y_limits = c(-5,5,1),
  pts_alpha = 0.5,
  pts_size = 1.5,
  factor_var = "x2",
  factor_levels = levels(residuals_sim3_df$x2),
  factor_colors = c("green","red","blue","orange")
)
plot_sim3_resid
Task: Plot x1 vs resid for different levels of x2 using panels for each level.
plot_sim3_panels_resid <- scatter_plot(
  df = residuals_sim3_df,
  aes_x = "x1",
  aes_y = "resid",
  title = "Residuals across x2 levels for sim3 data",
  subtitle = "model: lm(y ~ x1 + x2)",
  y_label = "Residuals",
  x_limits = c(1,10,1),
  y_limits = c(-5,5,1),
  factor_var = "x2",
  factor_levels = levels(residuals_sim3_df$x2),
  factor_level_panels = TRUE
)
plot_sim3_panels_resid

4 Compare two models from data set sim3

Task: Define a linear independent and interactive model for the sim3 data set,

model_indep_sim3 <- lm(y ~ x1 + x2, data = sim3)
model_inter_sim3 <- lm(y ~ x1 * x2, data = sim3)
Task: Compute the residuals of both models from sim3
residuals_models_sim3_df <- sim3 %>%
  spread_residuals(model_indep_sim3, model_inter_sim3)
glimpse(residuals_models_sim3_df)
Task: Convert the residual columns of residuals_models_sim3_df to a 'long' column.
residuals_models_sim3_df <- tidyr::pivot_longer(
  data = residuals_models_sim3_df,
  cols = model_indep_sim3:model_inter_sim3,
  names_to = "model",
  values_to = "resid")
glimpse(residuals_models_sim3_df)
Task: Plot resid vs y for both models.
plot_sim3_indep_inter <- scatter_plot(
  df = residuals_models_sim3_df,
  aes_x = "y",
  aes_y = "resid",
  title = "Residuals of linear models for sim3 data",
  subtitle = "models: lm(y ~ x1 + x2), lm(y ~ x1 * x2)",
  y_label = "Residuals",
  x_limits = c(0,10,1),
  y_limits = c(-5,5,1),
  factor_var = "model",
  factor_levels = levels(factor(residuals_models_sim3_df$model)),
  factor_level_panels = TRUE
)
plot_sim3_indep_inter


deandevl/R_plotter_pkg documentation built on Dec. 19, 2019, 12:03 a.m.