knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Programming Suggestion

textline +
  stat_summary(fun = mean,
               geom = "point") +
  stat_summary(fun = mean,
               geom = "line", 
               aes(group = Group)) +
  stat_summary(fun.data = mean_cl_normal,
               geom = "errorbar",
               width = .2) + 
  xlab("Measurement Time") +
  ylab("Mean Grammar Score") +
  cleanup +
  scale_color_manual(name = "Texting Option",
                     labels = c("All the texts", "None of the texts"),
                     values = c("Black", "Grey")) +
  scale_x_discrete(labels = c("Baseline", "Six Months"))

Outline

Working with Files

library(rio)
chickflick <- import("data/ChickFlick.sav")
str(chickflick)

Factor Categorical Variables

table(chickflick$gender)

table(chickflick$film)

How to Factor

chickflick$gender <- factor(chickflick$gender, #the variable you want to factor
                            levels = c(1,2), #the information already in the data
                            labels = c("Male", "Female")) #the labels for those levels

table(chickflick$gender)

Data Structure Format

Rearrange Data: Wide to Long

library(reshape) #note: you could also use pivot_longer in tidyverse
cricket <- import("data/Jiminy_Cricket.csv")
head(cricket)

Rearrange Data: Wide to Long

longcricket <- melt(cricket,  #name of dataset
                    id = c("ID", "Strategy"), 
                    measured = c("Success_Pre", "Success_Post"))
#you can actually leave measured blank
head(longcricket)

Rearrange Data: Wide to Long

colnames(longcricket)[3:4] #just to figure out which ones
colnames(longcricket)[3:4] <- c("Time", "Score")

The Art of Presenting Data

Why is this Graph Bad?

knitr::include_graphics("pictures/graphs/badgraph.png")

Other Bad Design Choices

Why is this Graph Better?

knitr::include_graphics("pictures/graphs/bettergraph.png")

Do Not Deceive the Reader!

knitr::include_graphics("pictures/graphs/deception.png")

Plotting in R

library(ggplot2)

Working with ggplot2

#an example 
myGraph <- ggplot(dataset,
                  aes(x_axis, y_axis, 
                      color = legend_var, 
                      fill = legend_var))

Working with ggplot2

#an example part 2
myGraph + 
  geom_bar() +
  geom_point() +
  xlab("X Axis Label") + 
  ylab("Y Axis Label")

Histograms

Histogram: Example

crickethist <- ggplot(data = cricket, #dataset
                      aes(x = Success_Pre) #only define X axis 
                      )
crickethist

Histogram: Example

crickethist + 
  geom_histogram()

Histogram: Example

crickethist + 
  geom_histogram(binwidth = 1)

Histogram: Example

crickethist + 
  geom_histogram(binwidth = 1, color = 'purple', fill = 'magenta')

Histogram: Example

crickethist + 
  geom_histogram(binwidth = 1, color = 'purple', fill = 'magenta') + 
  xlab("Success Pre Test") + 
  ylab("Frequency")

Histogram: Example 2

festival <- import("data/festival.csv")
str(festival)

Histogram: Example 2

festivalhist <- ggplot(data = festival, aes(x = day1)) 
festivalhist + 
  geom_histogram(binwidth = 1, color = 'blue') + 
  xlab("Day 1 of Festival Hygiene") +
  ylab("Frequency") +
  theme_bw() #theme_classic() also good!

Focus on these Facets

Clean Up?

cleanup <- theme(panel.grid.major = element_blank(), #no grid lines
                panel.grid.minor = element_blank(), #no grid lines
                panel.background = element_blank(), #no background
                axis.line.x = element_line(color = 'black'), #black x axis line
                axis.line.y = element_line(color = 'black'), #black y axis line
                legend.key = element_rect(fill = 'white'), #no legend background
                text = element_text(size = 15)) #bigger text size

Clean Up?

festivalhist + 
  geom_histogram(binwidth = 1, color = 'blue') + 
  xlab("Day 1 of Festival Hygiene") +
  ylab("Frequency") +
  cleanup

Scatterplots

Scatterplots: Example

exam <- import("data/Exam_Anxiety.csv")
str(exam)

Scatterplots: Example

table(exam$Gender)
exam$Gender <- factor(exam$Gender,
                     levels = c(1,2),
                     labels = c("Male", "Female"))
table(exam$Gender)

Simple Scatterplot

scatter <- ggplot(exam, aes(Anxiety, Exam))
scatter +
  geom_point() +
  xlab("Anxiety Score") +
  ylab("Exam Score") +
  cleanup

Simple Scatterplot with Regression Line

scatter + geom_point()+
  geom_smooth(method = 'lm', color = 'black', fill = 'blue') +
  xlab('Anxiety Score')+
  ylab('Exam Score')+
  cleanup

Grouped Scatterplot

Grouped Scatterplot with Regression Line

scatter2 <- ggplot(exam, aes(Anxiety, Exam, 
                             color = Gender, fill = Gender)) #why both?
scatter2 +
  geom_point() +
  geom_smooth(method = "lm") +
  xlab("Anxiety Score") +
  ylab("Exam Score") +
  cleanup + 
  scale_fill_manual(name = "Gender of Participant",
                    labels = c("Men", "Women"),
                    values = c("purple", "grey")) +
  scale_color_manual(name = "Gender of Participant",
                     labels = c("Men", "Women"),
                     values = c("purple", "grey10"))

GGally for Multiple Visualization

library(GGally)
ggpairs(data = exam[ , -1], #no participant variable
        title = "Exam Anxiety, Scores, and Gender")

Bar Graphs

Bar Graph: One Independent Variable

Bar Chart: One Independent Variable

str(chickflick) #already fixed gender
chickflick$film <- factor(chickflick$film,
                    levels = c(1,2),
                    labels = c("Bridget Jones", "Memento"))

Bar Chart: One Independent Variable Example

chickbar <- ggplot(chickflick, aes(film, arousal))
chickbar + 
  stat_summary(fun = mean,
               geom = "bar",
               fill = "White", 
               color = "Black") +
  cleanup

Bar Chart: One Independent Variable Example

chickbar + 
  stat_summary(fun = mean,
               geom = "bar",
               fill = "White", 
               color = "Black") +
  stat_summary(fun.data = mean_cl_normal, 
               geom = "errorbar", 
               position = position_dodge(width = 0.90), 
               width = 0.2) +
  cleanup

Bar Chart: One Independent Variable Example

chickbar + 
  stat_summary(fun = mean,
               geom = "bar",
               fill = "White", 
               color = "Black") +
  stat_summary(fun.data = mean_cl_normal, 
               geom = "errorbar", 
               position = position_dodge(width = 0.90), 
               width = 0.2) +
  xlab("Movie Watched by Participant") +
  ylab("Arousal Level") +
  cleanup +
  scale_x_discrete(labels = c("Girl Film", "Guy Film"))

Bar Chart: Two Independent Variables

chickbar2 <- ggplot(chickflick, aes(film, arousal, fill = gender))
chickbar2 +
  stat_summary(fun = mean,
               geom = "bar",
               position = "dodge") +
  stat_summary(fun.data = mean_cl_normal,
               geom = "errorbar", 
               position = position_dodge(width = 0.90),
               width = .2) +
  xlab("Film Watched") +
  ylab("Arousal Level") + 
  cleanup +
  scale_fill_manual(name = "Gender of Participant", 
                    labels = c("Boys", "Girls"),
                    values = c("Gray30", "Gray"))

Line Graphs

Line Graphs: One Independent Variable

hiccups <- import("data/Hiccups.csv")
str(hiccups)

Line Graphs: One Independent Variable

longhiccups <- melt(hiccups, 
                    measured = c("Baseline", "Tongue", "Carotid", "Other"))
str(longhiccups)
colnames(longhiccups) <- c("Intervention", "Hiccups")

Line Graphs: One Independent Variable

hiccupline <- ggplot(longhiccups, aes(Intervention, Hiccups))
hiccupline +
  stat_summary(fun = mean, ##adds the points
               geom = "point") +
  stat_summary(fun = mean, ##adds the line
               geom = "line",
               aes(group=1)) + ##necessary for mapping line to dots
  stat_summary(fun.data = mean_cl_normal, ##adds the error bars
               geom = "errorbar", 
               width = .2) +
  xlab("Intervention Type") +
  ylab("Number of Hiccups") + 
  cleanup

Line Graphs: Two Independent Variables

Line Graphs: Two Independent Variables

texting <- import("data/Texting.xlsx")
str(texting)

Line Graphs: Two Independent Variables

texting$Group <- factor(texting$Group,
                       levels = c(1,2),
                       labels = c("Texting Allowed", "No Texting Allowed"))
longtexting <- melt(texting,
                   id = c("Group"),
                   measured = c("Baseline", "Six_months"))
str(longtexting)
colnames(longtexting) <- c("Group", "Time", "Grammar_Score")

Line Graphs: Two Independent Variables

textline <- ggplot(longtexting, aes(Time, Grammar_Score, color = Group))
textline +
  stat_summary(fun = mean,
               geom = "point") +
  stat_summary(fun = mean,
               geom = "line", 
               aes(group = Group)) + #Group is the variable name
  stat_summary(fun.data = mean_cl_normal,
               geom = "errorbar",
               width = .2) + 
  xlab("Measurement Time") +
  ylab("Mean Grammar Score") +
  cleanup +
  scale_color_manual(name = "Texting Option",
                     labels = c("All the texts", "None of the texts"),
                     values = c("Black", "Grey")) +
  scale_x_discrete(labels = c("Baseline", "Six Months"))

Summary



doomlab/learnSTATS documentation built on June 9, 2022, 12:54 a.m.