lm.hw4: Linear Regression

test_that("multiplication works", {
#test case 1: simulation data - small sample size
  n = 10; p = 3; q = 2;
  x = matrix(rnorm(n * p), n, p) # no intercept
  y1 = rnorm(n)
  y2 = matrix(rnorm(n * q), n, q)

  # invalid cases
  # input response variable as a n*p matrix (p > 1)
  z_error1 = lm_fit(x = x, y = y2)
  expect_error(lm_summary(z_error1))
  # use methods other than "qr"
  z_error2 = lm_fit(x = x, y = y1, method = "inv")
  expect_error(lm_summary(z_error2), "only support 'qr' method")

  # input response variable as a vector or a n*1 matrix
  z1 = lm_fit(x = x, y = y1) # no intercept, using method "qr"
  z2 = lm_fit(x = x, y = y1, add.intercept = TRUE) # add intercept, using method "qr"

  test1 = stats::lm(y1 ~ 0 + x)
  test2 = stats::lm(y1 ~ x)

  result1 = lm_summary(z1, correlation = TRUE)
  result2 = lm_summary(z2, correlation =TRUE)
  test.s1 = summary.lm(test1, correlation = TRUE)
  test.s2 = summary.lm(test2, correlation = TRUE)

  expect_lt(sum(abs(result1$coefficients - test.s1$coefficients)), 1e-10)
  expect_lt(sum(abs(result1$residuals - test.s1$residuals)), 1e-10)
  expect_lt(abs(result1$sigma - test.s1$sigma), 1e-10)
  expect_equal(result1$df, test.s1$df)
  expect_lt(abs(result1$r.squared - test.s1$r.squared), 1e-10)
  expect_lt(abs(result1$adj.r.squared - test.s1$adj.r.squared), 1e-10)
  expect_lt(abs(result1$fstatistic[1] - test.s1$fstatistic[1]), 1e-10)
  expect_equal(result1$fstatistic[2:3], test.s1$fstatistic[2:3])
  expect_lt(sum(abs(result1$cov.unscaled - test.s1$cov.unscaled)), 1e-10)
  expect_lt(sum(abs(result1$correlation - test.s1$correlation)), 1e-10)

  expect_lt(sum(abs(result2$coefficients - test.s2$coefficients)), 1e-10)
  expect_lt(sum(abs(result2$residuals - test.s2$residuals)), 1e-10)
  expect_lt(abs(result2$sigma - test.s2$sigma), 1e-10)
  expect_equal(result2$df, test.s2$df)
  expect_lt(abs(result2$r.squared - test.s2$r.squared), 1e-10)
  expect_lt(abs(result2$adj.r.squared - test.s2$adj.r.squared), 1e-10)
  expect_lt(abs(result2$fstatistic[1] - test.s2$fstatistic[1]), 1e-10)
  expect_equal(result2$fstatistic[2:3], test.s2$fstatistic[2:3])
  expect_lt(sum(abs(result2$cov.unscaled - test.s2$cov.unscaled)), 1e-10)
  expect_lt(sum(abs(result2$correlation - test.s2$correlation)), 1e-10)

#test case 2: simulation data - big sample size
  n = 10000; p = 100; q = 2;
  x = matrix(rnorm(n * p), n, p) # no intercept
  y1 = rnorm(n)
  y2 = matrix(rnorm(n * q), n, q)

  # invalid cases
  # input response variable as a n*p matrix (p > 1)
  z_error1 = lm_fit(x = x, y = y2)
  expect_error(lm_summary(z_error1))
  # use methods other than "qr"
  z_error2 = lm_fit(x = x, y = y1, method = "inv")
  expect_error(lm_summary(z_error2), "only support 'qr' method")

  # input response variable as a vector or a n*1 matrix
  z1 = lm_fit(x = x, y = y1) # no intercept, using method "qr"
  z2 = lm_fit(x = x, y = y1, add.intercept = TRUE) # add intercept, using method "qr"

  test1 = stats::lm(y1 ~ 0 + x)
  test2 = stats::lm(y1 ~ x)

  result1 = lm_summary(z1, correlation = TRUE)
  result2 = lm_summary(z2, correlation =TRUE)
  test.s1 = summary.lm(test1, correlation = TRUE)
  test.s2 = summary.lm(test2, correlation = TRUE)

  expect_lt(sum(abs(result1$coefficients - test.s1$coefficients)), 1e-10)
  expect_lt(sum(abs(result1$residuals - test.s1$residuals)), 1e-10)
  expect_lt(abs(result1$sigma - test.s1$sigma), 1e-10)
  expect_equal(result1$df, test.s1$df)
  expect_lt(abs(result1$r.squared - test.s1$r.squared), 1e-10)
  expect_lt(abs(result1$adj.r.squared - test.s1$adj.r.squared), 1e-10)
  expect_lt(abs(result1$fstatistic[1] - test.s1$fstatistic[1]), 1e-10)
  expect_equal(result1$fstatistic[2:3], test.s1$fstatistic[2:3])
  expect_lt(sum(abs(result1$cov.unscaled - test.s1$cov.unscaled)), 1e-10)
  expect_lt(sum(abs(result1$correlation - test.s1$correlation)), 1e-10)

  expect_lt(sum(abs(result2$coefficients - test.s2$coefficients)), 1e-10)
  expect_lt(sum(abs(result2$residuals - test.s2$residuals)), 1e-10)
  expect_lt(abs(result2$sigma - test.s2$sigma), 1e-10)
  expect_equal(result2$df, test.s2$df)
  expect_lt(abs(result2$r.squared - test.s2$r.squared), 1e-10)
  expect_lt(abs(result2$adj.r.squared - test.s2$adj.r.squared), 1e-10)
  expect_lt(abs(result2$fstatistic[1] - test.s2$fstatistic[1]), 1e-10)
  expect_equal(result2$fstatistic[2:3], test.s2$fstatistic[2:3])
  expect_lt(sum(abs(result2$cov.unscaled - test.s2$cov.unscaled)), 1e-10)
  expect_lt(sum(abs(result2$correlation - test.s2$correlation)), 1e-10)

#test case 3: real data
  x = cbind(mtcars$hp, mtcars$wt, mtcars$hp * mtcars$wt)
  y = mtcars$mpg
  z = lm_fit(x, y, add.intercept = TRUE)
  test = lm(mpg ~ hp + wt + hp:wt, data = mtcars)

  result = lm_summary(z, correlation = TRUE)
  test.s = summary.lm(test, correlation = TRUE)

  expect_lt(sum(abs(result$coefficients - test.s$coefficients)), 1e-10)
  expect_lt(sum(abs(result$residuals - test.s$residuals)), 1e-10)
  expect_lt(abs(result$sigma - test.s$sigma), 1e-10)
  expect_equal(result$df, test.s$df)
  expect_lt(abs(result$r.squared - test.s$r.squared), 1e-10)
  expect_lt(abs(result$adj.r.squared - test.s$adj.r.squared), 1e-10)
  expect_lt(abs(result$fstatistic[1] - test.s$fstatistic[1]), 1e-10)
  expect_equal(result$fstatistic[2:3], test.s$fstatistic[2:3])
  expect_lt(sum(abs(result$cov.unscaled - test.s$cov.unscaled)), 1e-10)
  expect_lt(sum(abs(result$correlation - test.s$correlation)), 1e-10)
})