# tests/testthat/test_LinearRegression.R In UBC-MDS/Karl:

```# LinearRegression_test.R
# March 2018
#
# This script test the methods from the LinearRegression.R script.

## Packages
require(matlib)
require(MASS)

### Test simple linear regression for a continuous feature
# Generate small data to test our function
set.seed(4)
X <- data.frame('X1' = rnorm(10))
y <- X\$X1 + rnorm(10)

# Fit a linear regression on the data
model <- LinearRegression(X, y)

# True value of the coefficients
beta <- cov(X\$X1, y)/var(X\$X1)
alpha <- mean(y) - beta*mean(X\$X1)
fit <- alpha + beta*X\$X1
res <- y - fit

test_that("Testing LinearRegression for one continuous feature", {
expect_match(typeof(model), 'list')
expect_equal(names(model), c('weights', 'fitted', 'residuals'))

expect_equal(model\$weights['X1',][[1]], beta, tolerance = 1e-5)
expect_equal(model\$weights['intercept',][[1]], alpha, tolerance = 1e-5)
expect_equal(c(model\$fitted), fit, tolerance = 1e-5)
expect_equal(c(model\$residuals), res, tolerance = 1e-5)
})

### Test simple linear regression with duplicate observation
# Create a duplicate observation
X\$X1[5] <- X\$X1[1]

# Fit a linear regression on the data
model <- LinearRegression(X, y)

# True value of the coefficients
beta <- cov(X\$X1, y)/var(X\$X1)
alpha <- mean(y) - beta*mean(X\$X1)
fit <- alpha + beta*X\$X1
res <- y - fit

test_that("Testing LinearRegression for duplicate observations", {
expect_match(typeof(model), 'list')
expect_equal(names(model), c('weights', 'fitted', 'residuals'))

expect_equal(length(model\$weights), 2)
expect_equal(length(model\$fitted), 10)
expect_equal(length(model\$residuals), 10)

expect_equal(model\$weights['X1',][[1]], beta, tolerance = 1e-5)
expect_equal(model\$weights['intercept',][[1]], alpha, tolerance = 1e-5)
expect_equal(c(model\$fitted), fit, tolerance = 1e-5)
expect_equal(c(model\$residuals), res, tolerance = 1e-5)
})

### Test multi-linear regression for continuous features
## also test that the non numeric feature is unused
# Generate small data to test our function
set.seed(4)
X <- data.frame('X1' = rnorm(10), 'X2' = rnorm(10), 'X3' = rnorm(10),
'char' = rep('a', 10))
X\$char <- as.character(X\$char)
y <- X\$X1 + X\$X2 + X\$X3 + rnorm(10)

# Fit a linear regression on the data
model <- LinearRegression(X, y)

# True values
cols <- (sapply(X, typeof) %in% c('double', 'integer', 'numeric'))
X_mat <- X %>% dplyr::select(names(X)[cols])
X_mat <- cbind("intercept"=1, X_mat)
X_mat <- as.matrix(X_mat)
beta <- ginv(t(X_mat)%*%X_mat)%*%t(X_mat)%*%y
fit <- X_mat%*%beta
res <- y - fit

test_that("Testing LinearRegression for multi continuous features", {
expect_match(typeof(model), 'list')
expect_equal(names(model), c('weights', 'fitted', 'residuals'))

expect_equal(length(model\$weights), 4)
expect_equal(length(model\$fitted), 10)
expect_equal(length(model\$residuals), 10)

expect_equal(c(model\$weights), c(beta), tolerance = 1e-5)
expect_equal(c(model\$fitted), c(fit), tolerance = 1e-5)
expect_equal(c(model\$residuals), c(res), tolerance = 1e-5)
})

### Test multi-linear regression with missing values
# Generate small data to test our function
set.seed(4)
X <- data.frame('X1' = rnorm(10), 'X2' = rnorm(10), 'X3' = rnorm(10))
y <- X\$X1 + X\$X2 + X\$X3 + rnorm(10)

X\$X1[3] <- NaN
X\$X3[5] <- NaN

test_that("Missing values should return an error", {
expect_error(LinearRegression(X, y))
})

### Test error when there is no numeric feature
# Generate small data to test our function
X <- data.frame('char' = rep('a', 10))

test_that("No numeric feature should return an error", {
expect_error(LinearRegression(X, y))
})
```
UBC-MDS/Karl documentation built on May 22, 2019, 1:53 p.m.