knitr::opts_chunk$set(echo = TRUE, comment = NA, error = TRUE)

Loading and preprocessing the data

We start by downloading the raw data using the link provided by the instructor. We use a function that will download the zip file, unpack it and place it in an indicated directory. The function is called downloadZip.

Downloading and unpacking raw data file

downloadZip <- function(fileUrl, outDir="./data") {
  temp <- tempfile()
  download.file(fileUrl, temp, mode = "wb")
  unzip(temp, exdir = outDir)
}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
outDir <- "../inst/extdata"             # folder for raw data
downloadZip(fileUrl, outDir = outDir)   # download and unpack zip file

Create the RData file

More RData file may be generated during this assignment. They will be placed under the folder data.

# get the base folder and raw data folder
baseLoc <- system.file(package="RepDataPeerAssessment1")
extPath <- file.path(baseLoc, "extdata")
baseLoc
extPath
# save the 
activity <- read.csv(paste(extPath, "activity.csv", sep = "/"))
save(activity, file="../data/activity.RData")
rm(activity)
# this will work after at least a data file has been saved to this folder
system.file(file="data", package="RepDataPeerAssessment1")

Basic sanity check

library(RepDataPeerAssessment1)
data("activity")
head(activity)
dim(activity)
names(activity)
str(activity)
summary(activity)

Convert variable date as.factor to as.date

activity$date <- as.Date(activity$date)
save(activity, file="../data/activity.RData")
str(activity)

What is mean total number of steps taken per day?

We will ignore the NAs in this part of the assignment.

complete <- complete.cases(activity)
activity.cases <- activity[complete, ]

Histogram of total number of steps each day

hist(activity.cases$steps, breaks = 60)
hist(activity.cases$steps, breaks = 30)

Find the mean and the median total number of steps per day

mean(activity.cases$steps)
median(activity.cases$steps)

What is the average daily activity pattern?

Time series plot (day vs avg. steps)

(i.e. type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)

plot(activity.cases$interval, activity.cases$steps, type = "l")



byDate.steps <- aggregate(activity.cases$steps, by = list(activity.cases$Date), mean)
# rename the variable to something meaningful
names(byDate.steps) <- c("Day", "mean.steps")
plot(byDate.steps$Day, byDate.steps$mean.steps, type = "l")

Find day and interval with maximum number of steps

Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?

max(activity.cases$steps)
max.steps <- max(activity.cases$steps)
max.steps
index <- which(activity.cases$steps == max.steps)
whole_row <- activity.cases[index, ]
whole_row
# activity.cases$interval[which(activity.cases$steps == max(activity.cases$steps))]
# Find max steps per day and plot
byDate.steps.max <- aggregate(activity.cases$steps, by = list(activity.cases$Date), max)
names(byDate.steps.max) <- c("Day", "steps.max")
plot(byDate.steps.max$Day, byDate.steps.max$steps.max, type = "l")

Imputing missing values

Are there differences in activity patterns between weekdays and weekends?

Figures

The figure sizes have been customised so that you can easily put two images side-by-side.

plot(1:10)

More Examples

You can write math expressions, e.g. $Y = X\beta + \epsilon$, footnotes^[A footnote here.], and tables, e.g. using knitr::kable().

knitr::kable(head(mtcars, 10))

Also a quote using >:

"He who gives up [code] safety for [code] speed deserves neither." (via)



AlfonsoRReyes/RepDataPeerAssessment1 documentation built on May 5, 2019, 4:53 a.m.