Loading required packages


Loading and preprocessing the data

fileUrl = ""
download.file(fileUrl, destfile = "./data/")

dl = unzip(zipfile = "./data/", exdir = "./data")
dt.Raw = read.csv(dl)
dl <- "../../inst/extdata/activity.csv"
dt.Raw = read.csv(dl)

What is the Mean total numbers of steps taken per day?

For this part of the assignment we're ignoring missing values in the dataset

1 - Calculating the total number of steps taken per day

dt.Steps = aggregate(list(Steps = dt.Raw$steps), 
                     by = list(Date = dt.Raw$date), 
                     FUN = sum, na.rm = TRUE)


2 - Creating a Histogram of total number of steps taken each day

hist(dt.Steps$Steps, breaks = seq(0,25000, by = 2500), 
     col = "lightgreen", main = "Frequency of Steps per Day", 
     xlab = "Steps per Day (Groups of 2500)", ylab = "Frequency")

3 - Calculating the mean & median number of steps per day


What is the average daily activity pattern?

For this part of the assignment we're continuing to ignore missing values in the dataset

dt.DailyActivityPattern = aggregate(list(Steps = dt.Raw$steps), 
                                    by = list(Interval = dt.Raw$interval), FUN = mean, na.rm = TRUE)

1 - Making a time series plot of the average steps taken in every 5-minute interval

plot(dt.DailyActivityPattern$Interval, dt.DailyActivityPattern$Steps, type = "l", col = "lightgreen", main = "Avg. Steps per 5-minute Interval", xlab = "5-minute Intervals", ylab = "Avg. Steps Taken")

2 - Finding which interval, on average, has the maximum number of steps taken per day


Imputing missing values

1 - Calculating total number of missing values in the dataset


2/3 - Replacing all missing values with the avg. steps of that interval

dt.NA =$steps)

dt.AvgInterval = tapply(dt.Raw$steps, 
                        na.rm = TRUE, 
                        simplify = TRUE)
dt.Imputed = dt.Raw
dt.Imputed$steps[dt.NA] = dt.AvgInterval[as.character(dt.Imputed$interval[dt.NA])]

Recalculating total number of missing values in the dataset


4a - Creating a Histogram of total number of steps taken each day

dt.StepsImputed = aggregate(list(Steps = dt.Imputed$steps), 
                            by = list(Date = dt.Imputed$date), 
                            FUN = sum, na.rm = TRUE)

     breaks = seq(0,25000, by = 2500), 
     col = "lightgreen", 
     main = "Frequency of Steps per Day", 
     xlab = "Steps per Day (Groups of 2500)", ylab = "Frequency")

4b - Calculating the mean & median number of steps per day


Are there differences in activity patterns between weekdays and weekends?

1 - Creating new variable "DateType"

dt.Dates = dt.Imputed
dt.Dates$Day = weekdays(as.POSIXlt(dt.Imputed$date))
dt.Dates$DateType = ifelse(dt.Dates$Day %in% c("Saturday","Sunday"),"Weekend", "Weekday")

2 - Creating a panel plot with average number of steps taken per interval on Weekdays and Weekends

dt.DatesAgg = aggregate(data = dt.Dates, steps ~ interval + DateType, mean)

ggplot(dt.DatesAgg, aes(interval, steps, color = DateType)) + 
        geom_line() + 
        facet_grid(DateType ~ .) +
        xlab("5-minute Intervals") + 
        ylab("Avg. Steps Taken")

