knitr::opts_chunk$set(echo = TRUE, comment = NA, error = TRUE)

Loading and preprocessing the data

We start by downloading the raw data using the link provided by the instructor. We use a function that will download the zip file, unpack it and place it in an indicated directory. The function is called downloadZip.

Downloading and unpacking raw data file

downloadZip <- function(fileUrl, outDir="./data") {
  temp <- tempfile()
  download.file(fileUrl, temp, mode = "wb")
  unzip(temp, exdir = outDir)
fileUrl <- ""
outDir <- "../inst/extdata"             # folder for raw data
downloadZip(fileUrl, outDir = outDir)   # download and unpack zip file

Create the RData file

More RData file may be generated during this assignment. They will be placed under the folder data.

# get the base folder and raw data folder
baseLoc <- system.file(package="RepDataPeerAssessment1")
extPath <- file.path(baseLoc, "extdata")
# save the 
activity <- read.csv(paste(extPath, "activity.csv", sep = "/"))
save(activity, file="../data/activity.RData")
# this will work after at least a data file has been saved to this folder
system.file(file="data", package="RepDataPeerAssessment1")

Basic sanity check


Convert variable date as.factor to

activity$date <- as.Date(activity$date)
save(activity, file="../data/activity.RData")

What is mean total number of steps taken per day?

We will ignore the NAs in this part of the assignment.

complete <- complete.cases(activity)
activity.cases <- activity[complete, ]

Histogram of total number of steps each day

hist(activity.cases$steps, breaks = 60)
hist(activity.cases$steps, breaks = 30)

Find the mean and the median total number of steps per day


What is the average daily activity pattern?

Time series plot (day vs avg. steps)

(i.e. type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)

plot(activity.cases$interval, activity.cases$steps, type = "l")

byDate.steps <- aggregate(activity.cases$steps, by = list(activity.cases$Date), mean)
# rename the variable to something meaningful
names(byDate.steps) <- c("Day", "mean.steps")
plot(byDate.steps$Day, byDate.steps$mean.steps, type = "l")

Find day and interval with maximum number of steps

Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?

max.steps <- max(activity.cases$steps)
index <- which(activity.cases$steps == max.steps)
whole_row <- activity.cases[index, ]
# activity.cases$interval[which(activity.cases$steps == max(activity.cases$steps))]
# Find max steps per day and plot
byDate.steps.max <- aggregate(activity.cases$steps, by = list(activity.cases$Date), max)
names(byDate.steps.max) <- c("Day", "steps.max")
plot(byDate.steps.max$Day, byDate.steps.max$steps.max, type = "l")

Imputing missing values

Are there differences in activity patterns between weekdays and weekends?


