knitr::opts_chunk$set(comment=NA, fig.width=6, fig.height=6, echo = TRUE, eval = TRUE)
Here a link to the lecture slides for this session: LINK
In this practical you'll get started with R. By the end of this practical you will:
For this practical you will go through an existing analysis script chunck by chunk to experience how programming and analysing in R works. The idea is that you go through the code, copy the code chunks to the script editor, send the code to the console, and evaluate what happens.
While at it try to practice two very useful shortcuts: (1) cmd + enter (MAC)
or cntrl + enter (Windows)
for running the current line in the console. (2) cmd + shift + p (MAC)
or cntrl + shift + p (Windows)
for running the same block of code again in the console. The latter is really helpful because you can rerun the a chunk of code after you have made changes to it.
All of the code used in this tutorial is based on basic R functions. While they are already powerful, we will later in the course introduce you to more modern options for several of the steps.
# Install and load the yarrr package install.packages('yarrr') library(yarrr)
# Get help for pirates data ?pirates # Print the first few rows of the dataset head(pirates) # Show the structure of the dataset str(pirates) # Show the entire dataset in a new window View(pirates)
# What is the mean age? mean(pirates$age) # What was the height of the tallest pirate? max(pirates$height) # How many pirates are there of each sex? table(pirates$sex)
# What was the mean age for each sex? aggregate(formula = age ~ sex, data = pirates, FUN = mean) # What is the median age of pirates for each combination of sex and headband? aggregate(formula = age ~ sex + headband, data = pirates, FUN = median)
# --- A default histogram of pirate ages hist(x = pirates$age) # --- A customized histogram of pirate ages hist(x = pirates$age, main = "Distribution of pirate ages", col = "skyblue", border = "white", xlab = "Age", ylim = c(0, 400)) # Add mean label text(x = mean(pirates$age), y = 375, labels = paste("Mean = ", round(mean(pirates$age), 2))) # Add dashed line at mean segments(x0 = mean(pirates$age), y0 = 0, # ---- Overlapping histograms of pirate ages for females and males # Start with the female data hist(x = pirates$age[pirates$sex == "female"], main = "Distribution of pirate ages by sex", col = transparent("red", .2), border = "white", xlab = "Age", breaks = seq(0, 50, 2), probability = T, ylab = "", yaxt = "n") # Add male data hist(x = pirates$age[pirates$sex == "male"], add = T, probability = T, border = "white", breaks = seq(0, 50, 2), col = transparent("skyblue", .5)) # Add the legend legend(x = 40, y = .05, col = c("red", "skyblue"), legend = c("Female", "Male"), pch = 16, bty = "n")
# --- A simple scatterplot of pirate height and weight plot(x = pirates$height, y = pirates$weight, xlab = "Height (cm)", ylab = "Weight (kg)") # --- A fancier scatterplot of the same data with some additional arguments # Create main plot plot(x = pirates$height, y = pirates$weight, main = 'My first scatterplot of pirate data!', xlab = 'Height (in cm)', ylab = 'Weight (in kg)', pch = 16, # Filled circles col = gray(0, .1)) # Transparent gray # Add gridlines grid() # Create a linear regression model model <- lm(formula = weight ~ height, data = pirates) # Add regression to plot abline(model, col = 'blue', lty = 2)
# --- Look at all the palettes from piratepal() piratepal() # Look at the basel palette in detail piratepal(palette = "basel", plot.result = TRUE) # --- Scatterplot of pirate height and weight using the pony palette my.cols <- piratepal(palette = "pony", trans = .2, length.out = nrow(pirates)) # Create the plot plot(x = pirates$height, y = pirates$weight, main = "Random scatterplot with My Little Pony Colors", xlab = "Pony height", ylab = "Pony weight", pch = 21, # Round symbols with borders col = "white", # White border bg = my.cols, # Random colors bty = "n" # No plot border ) # Add gridlines grid()
# --- Barpot of mean height by favorite.pirate # Calculate mean height for each favorite.pirate pirate.heights <- aggregate(height ~ favorite.pirate, data = pirates, FUN = mean) barplot(pirate.heights$height, main = "Barplot of mean height by favorite pirate", names.arg = pirate.heights$favorite.pirate) # --- Same, but with customizations barplot(pirate.heights$height, ylim = c(0, 200), ylab = "Pirate Height (in cm)", main = "Barplot of mean height by favorite pirate", names.arg = pirate.heights$favorite.pirate, col = piratepal("basel", trans = .2)) abline(h = seq(0, 200, 25), lty = 3, lwd = c(1, .5))
# Do pirates with eyepatches have longer beards than those without eyepatches? t.test(formula = beard.length ~ eyepatch, data = pirates, alternative = 'two.sided') # ANOVA on beard.length as a function of sex and college # Run the ANOVA beard.aov <- aov(formula = beard.length ~ sex + college, data = pirates) # Print summary results summary(beard.aov)
# regression analysis showing if age, weight, and tattoos predict how many treasure chests a pirate has found # Run the regression chests.lm <- lm(formula = tchests ~ age + weight + tattoos, data = pirates) # Print summary results summary(chests.lm)
For more details on all steps of data analysis check out Hadley Wickham's R for Data Science.
For more on pirates and data analysis check out the respective chapters in YaRrr! The Pirate's Guide to R YaRrr! Chapter Link
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.