library(knitr) library(learnr) knitr::opts_chunk$set(echo = TRUE, exercise = FALSE)
We now consider a model of the form:
We load the libraries, the data and set the variables.
library(kknn)
(record <- moxier::record) response <- record[, "m100"] regressor <- record[, "m200"]
We make a first plot.
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3)
We fit a linear model.
fm <- lm(response ~ regressor) print(summary(fm))
We plot the results.
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3) abline(coefficients(fm), col = "red") points(regressor, fitted(fm), col = "red", pch = 16)
We create a qq-plot of the residuals,
qqnorm(residuals(fm))
perform a Normality test of the residuals,
shapiro.test(residuals(fm))
print the estimates of the regression coefficients,
coefficients(fm)
compute CI of the regression coefficients,
confint(fm, level = 0.95)
print the estimate of the error variance
s2 <- sum(residuals(fm)^2) / fm$df sqrt(s2)
plot the residuals vs regressor
plot(regressor, residuals(fm)) abline(h = 0)
Compute a CI for the mean $m_{100}$ record of countries that have $m_{200}$ record equal to 23.
Firstly, we build a new data base,
(Z0 <- data.frame(regressor = 23))
then we compute the CI
(CI <- predict(fm, Z0, interval = "confidence"))
and finally the PI.
(PI <- predict(fm, Z0, interval = "prediction", level = 0.95))
We build a new data base,
(Z0 <- data.frame(cbind(regressor = seq(21, 28, by = 0.1))))
we compute the CIs and plot them.
(CI <- predict(fm, Z0, interval = "confidence")) plot(regressor, response, asp = 1) lines(Z0[, 1], CI[, "fit"]) lines(Z0[, 1], CI[, "lwr"], lty = 4) lines(Z0[, 1], CI[, "upr"], lty = 4)
Finally, we compute the PIs and plot them.
PI <- predict(fm, Z0, interval = "prediction", level = 0.95)
plot(regressor, response, asp = 1) lines(Z0[, 1], CI[, "fit"]) lines(Z0[, 1], CI[, "lwr"], lty = 4) lines(Z0[, 1], CI[, "upr"], lty = 4) lines(Z0[, 1], PI[, "fit"]) lines(Z0[, 1], PI[, "lwr"], lty = 2) lines(Z0[, 1], PI[, "upr"], lty = 2)
We add all the disciplines.
We plot the data,
pairs(record)
then we fit the linear model.
fmall <- lm(m100 ~ ., record) print(summary(fmall))
Compare the models!
print(summary(fm)) print(summary(fmall))
We fit the linear model, but we remove the $m_{200}$ record from the regressors.
fm2red <- lm(m100 ~ ., record[, -2]) print(summary(fm2red))
We also fit a linear model to predict another discipline record.
fmallother <- lm(Marathon ~ ., record) print(summary(fmallother))
We plot the data,
pairs(record)
set the variables,
response <- record[, "m100"] regressor <- record[, "m200"]
plot them and att the linear regression lines for comparison.
plot(regressor, response) grid() abline(0, 1, lty = 3) fm <- lm(response ~ regressor) abline(coefficients(fm), col = "red")
Z0 <- data.frame(cbind(regressor = seq(min(regressor), max(regressor), length = 100))) predicted.response <- kknn(response ~ regressor, train = data.frame(regressor, response), test = Z0, k = 15, kernel = "rectangular" )
We plot the knn prediction line.
plot(regressor, response) grid() abline(0, 1, lty = 3) fm <- lm(response ~ regressor) abline(coefficients(fm), col = "red") lines(Z0[, "regressor"], predicted.response$fitted.values, col = "blue")
To select the value of $k$, we rely on LOO cross-validation
train.cv <- train.kknn(response ~ regressor, data = data.frame(regressor, response), kmax = 40, scale = F, kernel = "rectangular" )
We compare the CV errors with the responce variance.
plot(train.cv) abline(h = var(response))
Try to change regressor and compare the results!
We import the data and set the variables.
exchange <- moxier::cambi-CAD-USD response <- exchange[, "CAD"] regressor <- exchange[, "USD"]
We plot the data.
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3)
Fit a linear model
fm <- lm(response ~ regressor) summary(fm)
Plot the regression line and the fitted vaues
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3) abline(coefficients(fm), col = "red") points(regressor, fitted(fm), col = "red", pch = 16)
Create a qq-plot of the residuals
qqnorm(residuals(fm))
Perform a Normality test of the residuals
shapiro.test(residuals(fm))
Compute CI of the regression coefficients
confint(fm, level = 0.95)
Print the estimate of the error variance
s2 <- sum(residuals(fm)^2) / fm$df sqrt(s2)
Plot the residuals vs regressor
plot(regressor, residuals(fm)) abline(h = 0)
We switch to daily log returns
logreturn.CAD <- log(exchange[-1, "CAD"] / exchange[-length(exchange[, "CAD"]), "CAD"]) logreturn.USD <- log(exchange[-1, "USD"] / exchange[-length(exchange[, "USD"]), "USD"]) response <- logreturn.CAD regressor <- logreturn.USD
We plot the data,
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3)
fit a linear model,
fmlog <- lm(response ~ regressor) summary(fmlog)
plot the regression line and the fitted values,
plot(regressor, response, asp = 1) grid() abline(0, 1, lty = 3) abline(coefficients(fmlog), col = "red") points(regressor, fitted(fmlog), col = "red", pch = 16)
create a qq-plot of the residuals,
qqnorm(residuals(fmlog))
perform a Normality test of the residuals,
shapiro.test(residuals(fmlog))
compute the CI of the regression coefficients,
confint(fmlog, level = 0.95)
print the estimate of the error variance,
s2 <- sum(residuals(fmlog)^2) / fm$df sqrt(s2)
plot the residuals vs regressor.
plot(regressor, residuals(fmlog)) abline(h = 0)
We compare the results and plot the outcome.
CAD <- exchange[-1, "CAD"] USD <- exchange[-1, "USD"] logreturn.CAD <- log(exchange[-1, "CAD"] / exchange[-length(exchange[, "CAD"]), "CAD"]) logreturn.USD <- log(exchange[-1, "USD"] / exchange[-length(exchange[, "USD"]), "USD"]) CADfit <- fitted(fm)[-1] CADfitlog <- exchange[-length(CAD), "CAD"] * exp(fitted(fmlog))
plot(USD, CAD) points(USD, CADfit, pch = 16, col = "red") points(USD, CADfitlog, pch = 16, col = "green")
boxplot(cbind(CAD - CADfit, CAD - CADfitlog), main = "Prediction errors")
We set the variables, plot the data and add the linear regression line as a comparison.
response <- CAD regressor <- USD
plot(regressor, response) grid() abline(0, 1, lty = 3)
plot(regressor, response) grid() abline(0, 1, lty = 3) fm <- lm(response ~ regressor) abline(coefficients(fm), col = "red")
We fit
Z0 <- data.frame(cbind(regressor = seq(min(regressor), max(regressor), length = 100))) predicted.response <- kknn(response ~ regressor, train = data.frame(regressor, response), test = Z0, k = 16, kernel = "rectangular" )
and plot.
plot(regressor, response) grid() abline(0, 1, lty = 3) fm <- lm(response ~ regressor) abline(coefficients(fm), col = "red") lines(Z0[, "regressor"], predicted.response$fitted.values, col = "blue")
We set $k$ via LOO cross-validation.
train.cv <- train.kknn(response ~ regressor, data = data.frame(regressor, response), kmax = 40, scale = F, kernel = "rectangular" )
And we compare CV errors with the response variance.
plot(train.cv) abline(h = var(response))
We compare predictions.
predicted.response.withinsample <- kknn(response ~ regressor, train = data.frame(regressor, response), test = data.frame(regressor), k = 16, kernel = "rectangular" )
boxplot(cbind(CAD - CADfit, CAD - CADfitlog, CAD - predicted.response.withinsample$fitted.values), main = "Prediction errors")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.