knitr::opts_chunk$set(echo = TRUE, eval = TRUE)

Loading required packages

require(ggplot2)

Loading and preprocessing the data

if(!file.exists("./data")){dir.create("./data")}
fileUrl = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
download.file(fileUrl, destfile = "./data/Activity-monitoring-data.zip")

dl = unzip(zipfile = "./data/Activity-monitoring-data.zip", exdir = "./data")
dt.Raw = read.csv(dl)
dl <- "../../inst/extdata/activity.csv"
dt.Raw = read.csv(dl)

What is the Mean total numbers of steps taken per day?

For this part of the assignment we're ignoring missing values in the dataset

1 - Calculating the total number of steps taken per day

dt.Steps = aggregate(list(Steps = dt.Raw$steps), 
                     by = list(Date = dt.Raw$date), 
                     FUN = sum, na.rm = TRUE)

dt.Steps

2 - Creating a Histogram of total number of steps taken each day

hist(dt.Steps$Steps, breaks = seq(0,25000, by = 2500), 
     col = "lightgreen", main = "Frequency of Steps per Day", 
     xlab = "Steps per Day (Groups of 2500)", ylab = "Frequency")

3 - Calculating the mean & median number of steps per day

mean(dt.Steps$Steps)
median(dt.Steps$Steps)

What is the average daily activity pattern?

For this part of the assignment we're continuing to ignore missing values in the dataset

dt.DailyActivityPattern = aggregate(list(Steps = dt.Raw$steps), 
                                    by = list(Interval = dt.Raw$interval), FUN = mean, na.rm = TRUE)

1 - Making a time series plot of the average steps taken in every 5-minute interval

plot(dt.DailyActivityPattern$Interval, dt.DailyActivityPattern$Steps, type = "l", col = "lightgreen", main = "Avg. Steps per 5-minute Interval", xlab = "5-minute Intervals", ylab = "Avg. Steps Taken")

2 - Finding which interval, on average, has the maximum number of steps taken per day

dt.DailyActivityPattern[which.max(dt.DailyActivityPattern$Steps),]

Imputing missing values

1 - Calculating total number of missing values in the dataset

sum(is.na(dt.Raw$steps))

2/3 - Replacing all missing values with the avg. steps of that interval

dt.NA = is.na(dt.Raw$steps)

dt.AvgInterval = tapply(dt.Raw$steps, 
                        dt.Raw$interval, 
                        mean, 
                        na.rm = TRUE, 
                        simplify = TRUE)
dt.Imputed = dt.Raw
dt.Imputed$steps[dt.NA] = dt.AvgInterval[as.character(dt.Imputed$interval[dt.NA])]

Recalculating total number of missing values in the dataset

sum(is.na(dt.Imputed$steps))

4a - Creating a Histogram of total number of steps taken each day

dt.StepsImputed = aggregate(list(Steps = dt.Imputed$steps), 
                            by = list(Date = dt.Imputed$date), 
                            FUN = sum, na.rm = TRUE)

hist(dt.StepsImputed$Steps, 
     breaks = seq(0,25000, by = 2500), 
     col = "lightgreen", 
     main = "Frequency of Steps per Day", 
     xlab = "Steps per Day (Groups of 2500)", ylab = "Frequency")

4b - Calculating the mean & median number of steps per day

mean(dt.StepsImputed$Steps)
median(dt.StepsImputed$Steps)

Are there differences in activity patterns between weekdays and weekends?

1 - Creating new variable "DateType"

dt.Dates = dt.Imputed
dt.Dates$Day = weekdays(as.POSIXlt(dt.Imputed$date))
dt.Dates$DateType = ifelse(dt.Dates$Day %in% c("Saturday","Sunday"),"Weekend", "Weekday")

2 - Creating a panel plot with average number of steps taken per interval on Weekdays and Weekends

dt.DatesAgg = aggregate(data = dt.Dates, steps ~ interval + DateType, mean)

ggplot(dt.DatesAgg, aes(interval, steps, color = DateType)) + 
        geom_line() + 
        facet_grid(DateType ~ .) +
        xlab("5-minute Intervals") + 
        ylab("Avg. Steps Taken")


AlfonsoRReyes/RepDataPeerAssessment1 documentation built on May 5, 2019, 4:53 a.m.