knitr::opts_chunk$set(echo = TRUE, comment = NA, error = TRUE)
We start by downloading the raw data using the link provided by the instructor. We use a function that will download the zip file, unpack it and place it in an indicated directory. The function is called downloadZip
.
downloadZip <- function(fileUrl, outDir="./data") { temp <- tempfile() download.file(fileUrl, temp, mode = "wb") unzip(temp, exdir = outDir) }
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip" outDir <- "../inst/extdata" # folder for raw data downloadZip(fileUrl, outDir = outDir) # download and unpack zip file
More RData file may be generated during this assignment. They will be placed under the folder data
.
# get the base folder and raw data folder baseLoc <- system.file(package="RepDataPeerAssessment1") extPath <- file.path(baseLoc, "extdata") baseLoc extPath
# save the activity <- read.csv(paste(extPath, "activity.csv", sep = "/")) save(activity, file="../data/activity.RData") rm(activity)
# this will work after at least a data file has been saved to this folder system.file(file="data", package="RepDataPeerAssessment1")
library(RepDataPeerAssessment1) data("activity") head(activity)
dim(activity)
names(activity)
str(activity)
summary(activity)
date
as.factor to as.dateactivity$date <- as.Date(activity$date) save(activity, file="../data/activity.RData") str(activity)
We will ignore the NAs in this part of the assignment.
complete <- complete.cases(activity) activity.cases <- activity[complete, ]
hist(activity.cases$steps, breaks = 60)
hist(activity.cases$steps, breaks = 30)
mean(activity.cases$steps)
median(activity.cases$steps)
(i.e. type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
plot(activity.cases$interval, activity.cases$steps, type = "l")
byDate.steps <- aggregate(activity.cases$steps, by = list(activity.cases$Date), mean) # rename the variable to something meaningful names(byDate.steps) <- c("Day", "mean.steps") plot(byDate.steps$Day, byDate.steps$mean.steps, type = "l")
Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
max(activity.cases$steps)
max.steps <- max(activity.cases$steps) max.steps
index <- which(activity.cases$steps == max.steps) whole_row <- activity.cases[index, ] whole_row
# activity.cases$interval[which(activity.cases$steps == max(activity.cases$steps))]
# Find max steps per day and plot byDate.steps.max <- aggregate(activity.cases$steps, by = list(activity.cases$Date), max) names(byDate.steps.max) <- c("Day", "steps.max") plot(byDate.steps.max$Day, byDate.steps.max$steps.max, type = "l")
The figure sizes have been customised so that you can easily put two images side-by-side.
plot(1:10)
You can write math expressions, e.g. $Y = X\beta + \epsilon$, footnotes^[A footnote here.], and tables, e.g. using knitr::kable()
.
knitr::kable(head(mtcars, 10))
Also a quote using >
:
"He who gives up [code] safety for [code] speed deserves neither." (via)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.