tests/testthat/test-SPSP.R

test_that("Check SPSP_step() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  
  library(glmnet)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  lasso_fit <- glmnet(x = x, y = y, alpha = 1, intercept = FALSE)
  
  # library(plotmo) # for plot_glmnet
  # plot_glmnet(lasso_fit, label=15, xvar ="lambda")
  # coef(lasso_fit, s = 0.5)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients
  
  
  # summary(test_lm_3)
  
  # SPSP+Lasso method
  K <- dim(lasso_fit$beta)[2]
  LBETA <- as.matrix(lasso_fit$beta)
  
  r_spsp1 <- SPSP::SPSP_step(x = x, y = y, BETA = LBETA, init = 1, standardize = FALSE, intercept = FALSE)
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP_step(x = x, y = y, BETA = LBETA, init = 5, standardize = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP_step(x = x, y = y, BETA = LBETA, init = 5, standardize = T, intercept = T)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  # r_spsp5_std$intercept
  
  # Comparison
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  # expect_equal(coef(test_lm_3), est_beta_5_std[1:3])
  
  expect_equal(test_lm_int[1], r_spsp5_std$intercept)
})

test_that("Check SPSP() with lasso.glmnet() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  
  library(glmnet)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  xstd <- scale(HighDim[,-1], center = TRUE, scale = TRUE)
  
  HighDim2 <- data.frame(Y = y, xstd)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  test_lm_std_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim2)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients
  
  
  lasso_fit <- glmnet(x = x, y = y, alpha = 1, intercept = FALSE)
  # coef(lasso_fit)
  
  r_spsp1 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = lasso.glmnet,
                        init = 1, standardize = FALSE, intercept = FALSE)
  
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  # head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = lasso.glmnet,
                        init = 5, standardize = FALSE, intercept = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                            init = 5, standardize = TRUE, intercept = FALSE)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  
  r_spsp5_std_int <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                            init = 5, standardize = TRUE, intercept = TRUE)
  est_beta_5_std_int <- r_spsp5_std_int$beta_SPSP
  # head(est_beta_5_std_int)
  # r_spsp5_std_int$intercept
  
  # Check if coefficients are same as ols
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std_int[1:3])
  
  # Check if intercept is correct
  expect_true(is.na(r_spsp1$intercept))
  expect_true(is.na(r_spsp5$intercept))
  expect_true(is.na(r_spsp5_std$intercept))
  expect_true(!is.na(r_spsp5_std_int$intercept))
})

test_that("Check SPSP() with adalasso.glmnet() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  library(glmnet)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  xstd <- scale(HighDim[,-1], center = TRUE, scale = TRUE)
  
  HighDim2 <- data.frame(Y = y, xstd)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  test_lm_std_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim2)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients
  
  r_spsp1 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                        init = 1, standardize = FALSE, intercept = FALSE)
  
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  # head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                        init = 5, standardize = FALSE, intercept = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                            init = 5, standardize = TRUE, intercept = FALSE)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  
  r_spsp5_std_int <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                                init = 5, standardize = TRUE, intercept = TRUE)
  est_beta_5_std_int <- r_spsp5_std_int$beta_SPSP
  # head(est_beta_5_std_int)
  # r_spsp5_std_int$intercept
  
  # Check if coefficients are same as ols
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std_int[1:3])
  
  # Check if intercept is correct
  expect_true(is.na(r_spsp1$intercept))
  expect_true(is.na(r_spsp5$intercept))
  expect_true(is.na(r_spsp5_std$intercept))
  expect_true(!is.na(r_spsp5_std_int$intercept))
})

test_that("Check SPSP() with adalassoCV.glmnet() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  
  library(glmnet)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  xstd <- scale(HighDim[,-1], center = TRUE, scale = TRUE)
  
  HighDim2 <- data.frame(Y = y, xstd)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  test_lm_std_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim2)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients
  
  r_spsp1 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                        init = 1, standardize = FALSE, intercept = FALSE)
  
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  # head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                        init = 5, standardize = FALSE, intercept = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                            init = 5, standardize = TRUE, intercept = FALSE)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  
  r_spsp5_std_int <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                                init = 5, standardize = TRUE, intercept = TRUE)
  est_beta_5_std_int <- r_spsp5_std_int$beta_SPSP
  # head(est_beta_5_std_int)
  # r_spsp5_std_int$intercept
  
  # Check if coefficients are same as ols
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std_int[1:3])
  
  # Check if intercept is correct
  expect_true(is.na(r_spsp1$intercept))
  expect_true(is.na(r_spsp5$intercept))
  expect_true(is.na(r_spsp5_std$intercept))
  expect_true(!is.na(r_spsp5_std_int$intercept))
})

test_that("Check SPSP() with SCAD.ncvreg() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  
  library(ncvreg)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  xstd <- scale(HighDim[,-1], center = TRUE, scale = TRUE)
  
  HighDim2 <- data.frame(Y = y, xstd)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  test_lm_std_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim2)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients

  test <- SCAD.ncvreg(x = x, 
                          y = y, 
                          family = "gaussian",
                          standardize = FALSE, 
                          intercept = FALSE)
  # test$beta
  
  r_spsp1 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = SCAD.ncvreg,
                        init = 1, standardize = FALSE, intercept = FALSE)
  
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  # head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = SCAD.ncvreg,
                        init = 5, standardize = FALSE, intercept = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = SCAD.ncvreg,
                            init = 5, standardize = TRUE, intercept = FALSE)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  
  r_spsp5_std_int <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = SCAD.ncvreg,
                                init = 5, standardize = TRUE, intercept = TRUE)
  est_beta_5_std_int <- r_spsp5_std_int$beta_SPSP
  # head(est_beta_5_std_int)
  # r_spsp5_std_int$intercept
  
  # Check if coefficients are same as ols
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std_int[1:3])
  
  # Check if intercept is correct
  expect_true(is.na(r_spsp1$intercept))
  expect_true(is.na(r_spsp5$intercept))
  expect_true(is.na(r_spsp5_std$intercept))
  expect_true(!is.na(r_spsp5_std_int$intercept))
})

test_that("Check SPSP() with MCP.ncvreg() use data(HighDim)", {
  skip_if_not_installed("glmnet")
  
  library(ncvreg)
  
  # Use the high dimensional dataset (data(HighDim)) to test SPSP+Lasso:
  data(HighDim)  
  
  x <- as.matrix(HighDim[,-1])
  y <- HighDim[,1]
  
  xstd <- scale(HighDim[,-1], center = TRUE, scale = TRUE)
  
  HighDim2 <- data.frame(Y = y, xstd)
  
  test_lm_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim)
  
  test_lm_std_3 <- lm(Y ~ X1 + X2 + X3 + 0, data = HighDim2)
  
  # When nonzero == 0, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = rep(1, length(y)), intercept = TRUE, family = gaussian())$coefficients
  
  # When nonzero >= 1, run the following to obtain intercept
  # test_lm_int <- glm.fit(y = y, x = cbind(1, x[,1:3]), intercept = FALSE, family = gaussian())$coefficients
  test_lm_int <- glm(y ~ X1 + X2 + X3 + 1, data = HighDim, family = gaussian())$coefficients
  
  test <- MCP.ncvreg(x = x, 
                      y = y, 
                      family = "gaussian",
                      standardize = FALSE, 
                      intercept = FALSE)
  # test$beta
  
  r_spsp1 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = MCP.ncvreg,
                        init = 1, standardize = FALSE, intercept = FALSE)
  
  est_beta_1 <- r_spsp1$beta_SPSP
  # colnames(X)[which(est_beta_1 != 0)]
  # head(est_beta_1)
  
  r_spsp5 <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = MCP.ncvreg,
                        init = 5, standardize = FALSE, intercept = FALSE)
  est_beta_5 <- r_spsp5$beta_SPSP
  # head(est_beta_5)
  
  r_spsp5_std <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = MCP.ncvreg,
                            init = 5, standardize = TRUE, intercept = FALSE)
  est_beta_5_std <- r_spsp5_std$beta_SPSP
  # head(est_beta_5_std)
  
  r_spsp5_std_int <- SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = MCP.ncvreg,
                                init = 5, standardize = TRUE, intercept = TRUE)
  est_beta_5_std_int <- r_spsp5_std_int$beta_SPSP
  # head(est_beta_5_std_int)
  # r_spsp5_std_int$intercept
  
  # Check if coefficients are same as ols
  expect_equal(coef(test_lm_3), est_beta_1[1:3])
  expect_equal(coef(test_lm_3), est_beta_5[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std[1:3])
  expect_equal(coef(test_lm_std_3), est_beta_5_std_int[1:3])
  
  # Check if intercept is correct
  expect_true(is.na(r_spsp1$intercept))
  expect_true(is.na(r_spsp5$intercept))
  expect_true(is.na(r_spsp5_std$intercept))
  expect_true(!is.na(r_spsp5_std_int$intercept))
})

test_that("Check SPSP() with use data(Boston)", {
  skip_if_not_installed("MASS")
  skip_if_not_installed("glmnet")
  
  library(glmnet)
  library(MASS)
  # Boston Housing data from http://archive.ics.uci.edu/ml/datasets/Housing
  data(Boston, package="MASS")
  colnames(Boston) 
  summary(Boston)
  
  Boston2 <- subset(Boston, crim<=3.2)
  # Boston2 <- Boston
  # dim(Boston2)
  x <- as.matrix(Boston2[,-14]); y <- Boston2[,14] 
  summary(x)
  x[,"crim"] <- log(x[,"crim"])
  x[,"tax"] <- log(x[,"tax"])
  x[,"lstat"] <- log(x[,"lstat"])
  x[,"dis"] <- log(x[,"dis"])
  # x[,"rad"] <- log(x[,"rad"])
  y <- scale(y)
  x[,"age"] <- log(x[,"age"])
  
  for (i in 1:(ncol(x))){
    x[,i] <- scale(x[,i])
  }
  # summary(x)
  
  # round(cor(x), 2)
  # round(cor(Boston[,-14]), 2)
  # summary(y)
  
  expect_error(SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = lasso.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  expect_error(SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalasso.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  expect_error(SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  expect_error(SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = ridge.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  err <- expect_error(SPSP::SPSP(x = x, y = y, family = "gaussian", fitfun.SP = lasso.lars,
                          init = 5, standardize = T, intercept = FALSE))
  expect_equal(err$message, "The function lasso.lars() is currently under development.")
  
})

test_that("Check penalty.factor argument in glmnet() for SPSP()", {
  skip_if_not_installed("MASS")
  skip_if_not_installed("glmnet")
  
  library(glmnet)
  library(MASS)
  # Boston Housing data from http://archive.ics.uci.edu/ml/datasets/Housing
  data(Boston, package="MASS")
  colnames(Boston) 
  summary(Boston)
  
  Boston2 <- subset(Boston, crim<=3.2)
  # Boston2 <- Boston
  # dim(Boston2)
  x <- as.matrix(Boston2[,-14]); medv <- Boston2[,14] 
  summary(x)
  x[,"crim"] <- log(x[,"crim"])
  x[,"tax"] <- log(x[,"tax"])
  x[,"lstat"] <- log(x[,"lstat"])
  x[,"dis"] <- log(x[,"dis"])
  # x[,"rad"] <- log(x[,"rad"])
  medv <- scale(medv)
  x[,"age"] <- log(x[,"age"])
  
  for (i in 1:(ncol(x))){
    x[,i] <- scale(x[,i])
  }
  # summary(x)
  
  # round(cor(x), 2)
  # round(cor(Boston[,-14]), 2)
  # summary(y)
  
  expect_error(SPSP::SPSP(x = x, y = medv, family = "gaussian", fitfun.SP = lasso.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  test1 <- SPSP::SPSP(x = x, y = medv, family = "gaussian", fitfun.SP = lasso.glmnet,
                      args.fitfun.SP = list(penalty.factor = c(rep(1, 5), 0, rep(1, 6), 0)),
                      init = 1, standardize = T, intercept = FALSE)
  # test1$nonzero

  fit1 <- attr(test1, "glmnet.fit")
  # fit1$beta
  
  # library(plotmo) 
  
  # plot_glmnet(fit1, label=15, xvar ="lambda")
  
  expect_error(SPSP::SPSP(x = x, y = medv, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                          init = 5, standardize = T, intercept = FALSE),
               NA)
  
  test2 <- SPSP::SPSP(x = x, y = medv, family = "gaussian", fitfun.SP = adalassoCV.glmnet,
                      args.fitfun.SP = list(penalty.factor = c(rep(1, 5), 0, rep(1, 6), 0)),
                      init = 1, standardize = T, intercept = FALSE)
  # test2$nonzero
  fit2 <- attr(test2, "glmnet.fit")
  
  # plot_glmnet(fit2, label=15, xvar ="lambda")
  
})

Try the SPSP package in your browser

Any scripts or data that you put into this service are public.

SPSP documentation built on Oct. 23, 2023, 1:07 a.m.