library(RepDataPeerAssessment1)
data(activity)
head(activity)

Reshape data frame to show intervals as columns. Days are shown as rows and steps are the values under the interval columns.

library(reshape)

activity$interval <- as.factor(activity$interval)

names(activity) <- c("value", "date", "interval")

actbycol <- cast(activity, date ~ interval)
# str(actbycol$"820")
# write.csv(actbycol, "intervals.csv")
head(actbycol)
view(actbycol)
# find unique intervals
unique.intervals <- unique(activity$interval)
length(unique.intervals)
# determine if there is any NA in at least a column
# if there is none it means that entire rows only have NAs

length(sapply(actbycol[, 2:289], function(x)any(is.na(x))))

Tests

# extract a day and calculate the sum of all steps for that day
# this data frame is 288x3
act_20121129 <- activity[activity$date == "2012-11-29", ]
sum(act_20121129$value)
dim(act_20121129)
head(act_20121129)
# extract one day and calculate the total of steps that day
# this data frame is 1x289. the first column is the date
bycol_20121129 <- actbycol[actbycol$date == "2012-11-29",]
total_steps <- rowSums(bycol_20121129[ , 2:289]) # Explicit column numbers
total_steps
dim(bycol_20121129)
head(bycol_20121129)

Find the rows that are all NA

# this is a data frame 53x289
mask <-complete.cases(actbycol)
actbycol.complete <- actbycol[mask, ]
dim(actbycol.complete)

# calculate the colMeans excluding the date column
colMeans(actbycol.complete[ , 2:289])  # 288 calculations
# group by date and interval
byDateIntv <- aggregate(activity$steps, by = list(activity$date, activity$interval), max)
library(reshape)
molten <- melt(activity, id = c("date", "steps"))
head(molten)
require(reshape2)

indata <- data.frame(  Marker = rep(c("M1","M2","M3","M4"), 3),
     Genotype = rep(c("G1","G2","G3"), each=4),
     value = c("AA","TT","GG","CC","AA","GG","AA","TT","GG","CC","AA","GG") )

outdata <- dcast(indata, Marker ~ Genotype)
head(outdata)

Removed from assignment. not relevant to the question

Time series plot (day vs avg. steps)

(i.e. type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)

# added to reproduce the chunk below
library(RepDataPeerAssessment1)
data("activity")
plot(activity.cases$interval, activity.cases$steps, type = "l")

maximum number of steps by date

Removed because is not relevant:

# activity.cases$interval[which(activity.cases$steps == max(activity.cases$steps))]
# Find max steps per day and plot
byDate.steps.max <- aggregate(activity.cases$steps, by = list(activity.cases$date), max)
names(byDate.steps.max) <- c("Day", "steps.max")
plot(byDate.steps.max$Day, byDate.steps.max$steps.max, type = "l")


AlfonsoRReyes/RepDataPeerAssessment1 documentation built on May 5, 2019, 4:53 a.m.