library(RepDataPeerAssessment1)
library(rprojroot)

project.data <- find_package_root_file('data')
project.extdata <- find_package_root_file('inst/extdata')
project.R <- find_package_root_file('R')
project.data
project.R
project.extdata
data("activity")
steps <- activity$steps
summary(steps)

We have r sum(is.na(steps)) NA values.

Imputation with na.interp

We have now r sum(is.na(steps.imp)) NA values.

summary(activity$steps)
se.0 <- sd(activity$steps, 
           na.rm = TRUE) / sqrt(sum(!is.na(activity$steps)))
cat("SE(before imp.) = ", se.0, "\n")

steps.imp.interp <- na.interp(steps)     # interpolating the NA values

summary(steps.imp.interp)
se.1 <- sd(steps.imp.interp, 
           na.rm = TRUE) / sqrt(sum(!is.na(steps.imp.interp)))
cat("SE(after imp.) = ", se.1, "\n")

Imputation with the mean

cat("Before imputation\n")
steps.imp.mean <- activity$steps
indNA <- which(is.na(steps.imp.mean))   # rows that are NA
cat(sprintf("# rows with NA: %d \n", length(indNA)))

# create vector with the mean imputation
predicteds <- rep(mean(steps.imp.mean, na.rm = TRUE), length(indNA))
summary(predicteds)

cat("\nAfter imputation: mean\n")
steps.imp.mean[indNA] <- predicteds     # replace NAs with the mean vector
indNA <- which(is.na(steps.imp.mean))   # rows that are NA
cat(sprintf("# rows with NA: %d \n", length(indNA)))
summary(steps.imp.mean)

se.2 <- sd(steps.imp.mean, 
           na.rm = TRUE) / sqrt(sum(!is.na(steps.imp.mean)))

cat("\nSE(steps.imp.mean) = ", se.2, "\n")
library(DMwR)

indNA <- which(is.na(activity$steps))
before <- activity$steps[complete.cases(activity$steps)]
summary(steps.imp.interp)
summary(steps.imp.mean)
regr.eval(steps.imp.mean, steps.imp.interp)      # compare original vs modified variable

using interpolation

library(forecast)
library(zoo)

start <- as.POSIXct("2010-01-01") 
freq <- as.difftime(6, units = "hours") 
dayvals <- (1:4)*10 
timevals <- c(3, 1, 2, 4) 
index <- seq(from = start, by = freq, length.out = 16)
obs <- (rep(dayvals, each = 4) + rep(timevals, times = 4))
myzoo <- zoo(obs, index)
myzoo[10] <- NA
cat(myzoo[10])
myzoo.fixed <- na.locf(myzoo)
myzoo.fixed[10]
fit <- na.interp(myzoo)
fit[10]  # 32.5, vs. 31.0 actual and 32.0 from Rob Hyndman's answer
myzoo[10] <- fit[10]
x <- ts(myzoo,f=4)
fit <- ts(rowSums(tsSmooth(StructTS(x))[,-2]))
tsp(fit) <- tsp(x)
plot(x)
lines(fit,col=2)
cat(myzoo[10])


AlfonsoRReyes/RepDataPeerAssessment1 documentation built on May 5, 2019, 4:53 a.m.