Nothing
context("test-pls-cpp-consistency")
test_that("PLS (NIPALS) implementations produce consistent results with scale = TRUE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
tol2 <- 1e-5
# Fit models
pls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 100,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = TRUE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "pls"
)
# Predictions
pls_basics_predict <- resemble:::predict_opls(
bo = pls_basics$bo,
b = pls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = pls_basics$transf$Xscale
)
pls_opls_predict <- resemble:::predict_opls(
bo = pls_opls$bo,
b = pls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = pls_opls$transf$Xscale
)
# Reconstruction error
pls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = pls_opls$projection_mat,
xloadings = pls_opls$X_loadings,
scale = TRUE,
Xcenter = pls_opls$transf$Xcenter,
Xscale = pls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(pls_projection$weights - pls_basics$weights)),
tol,
label = "PLS scale=TRUE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(pls_basics$weights - pls_opls$weights)),
tol,
label = "PLS scale=TRUE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(pls_basics_predict - pls_opls_predict)),
tol,
label = "PLS scale=TRUE: predict_opls consistency"
)
expect_lt(
sum(abs(pls_projection$scores - pls_opls$scores)),
tol,
label = "PLS scale=TRUE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(pls_gesearch$pred_response - pls_opls_predict[, ncomp_test])),
tol,
label = "PLS scale=TRUE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(pls_gesearch$rmse_reconstruction) - drop(pls_opls_rec)),
tol,
label = "PLS scale=TRUE: opls_gesearch vs reconstruction_error"
)
})
test_that("PLS (NIPALS) implementations produce consistent results with scale = FALSE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
# Fit models
pls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 100,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = FALSE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "pls"
)
# Predictions
pls_basics_predict <- resemble:::predict_opls(
bo = pls_basics$bo,
b = pls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = pls_basics$transf$Xscale
)
pls_opls_predict <- resemble:::predict_opls(
bo = pls_opls$bo,
b = pls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = pls_opls$transf$Xscale
)
# Reconstruction error
pls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = pls_opls$projection_mat,
xloadings = pls_opls$X_loadings,
scale = FALSE,
Xcenter = pls_opls$transf$Xcenter,
Xscale = pls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(pls_projection$weights - pls_basics$weights)),
tol,
label = "PLS scale=FALSE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(pls_basics$weights - pls_opls$weights)),
tol,
label = "PLS scale=FALSE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(pls_basics_predict - pls_opls_predict)),
tol,
label = "PLS scale=FALSE: predict_opls consistency"
)
expect_lt(
sum(abs(pls_projection$scores - pls_opls$scores)),
tol,
label = "PLS scale=FALSE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(pls_gesearch$pred_response - pls_opls_predict[, ncomp_test])),
tol,
label = "PLS scale=FALSE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(pls_gesearch$rmse_reconstruction) - drop(pls_opls_rec)),
tol,
label = "PLS scale=FALSE: opls_gesearch vs reconstruction_error"
)
# Xscale should be all ones when scale = FALSE
expect_true(
all(pls_basics$transf$Xscale == 1),
label = "PLS scale=FALSE: Xscale should be all ones (opls_get_basics)"
)
expect_true(
all(pls_opls$transf$Xscale == 1),
label = "PLS scale=FALSE: Xscale should be all ones (opls)"
)
})
test_that("SIMPLS implementations produce consistent results with scale = TRUE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
tol2 <- 1e-5
# Fit models
simpls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 100,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = TRUE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "simpls"
)
# Predictions
simpls_basics_predict <- resemble:::predict_opls(
bo = simpls_basics$bo,
b = simpls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = simpls_basics$transf$Xscale
)
simpls_opls_predict <- resemble:::predict_opls(
bo = simpls_opls$bo,
b = simpls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = simpls_opls$transf$Xscale
)
# Reconstruction error
simpls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = simpls_opls$projection_mat,
xloadings = simpls_opls$X_loadings,
scale = TRUE,
Xcenter = simpls_opls$transf$Xcenter,
Xscale = simpls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(simpls_projection$weights - simpls_basics$weights)),
tol,
label = "SIMPLS scale=TRUE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(simpls_basics$weights - simpls_opls$weights)),
tol,
label = "SIMPLS scale=TRUE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(simpls_basics_predict - simpls_opls_predict)),
tol2,
label = "SIMPLS scale=TRUE: predict_opls consistency"
)
expect_lt(
sum(abs(simpls_projection$scores - simpls_opls$scores)),
tol,
label = "SIMPLS scale=TRUE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(simpls_gesearch$pred_response - simpls_opls_predict[, ncomp_test])),
tol2,
label = "SIMPLS scale=TRUE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(simpls_gesearch$rmse_reconstruction) - drop(simpls_opls_rec)),
tol,
label = "SIMPLS scale=TRUE: opls_gesearch vs reconstruction_error"
)
})
test_that("SIMPLS implementations produce consistent results with scale = FALSE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
tol2 <- 1e-5
# Fit models
simpls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 100,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = FALSE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "simpls"
)
# Predictions
simpls_basics_predict <- resemble:::predict_opls(
bo = simpls_basics$bo,
b = simpls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = simpls_basics$transf$Xscale
)
simpls_opls_predict <- resemble:::predict_opls(
bo = simpls_opls$bo,
b = simpls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = simpls_opls$transf$Xscale
)
# Reconstruction error
simpls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = simpls_opls$projection_mat,
xloadings = simpls_opls$X_loadings,
scale = FALSE,
Xcenter = simpls_opls$transf$Xcenter,
Xscale = simpls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(simpls_projection$weights - simpls_basics$weights)),
tol,
label = "SIMPLS scale=FALSE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(simpls_basics$weights - simpls_opls$weights)),
tol,
label = "SIMPLS scale=FALSE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(simpls_basics_predict - simpls_opls_predict)),
tol2,
label = "SIMPLS scale=FALSE: predict_opls consistency"
)
expect_lt(
sum(abs(simpls_projection$scores - simpls_opls$scores)),
tol,
label = "SIMPLS scale=FALSE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(simpls_gesearch$pred_response - simpls_opls_predict[, ncomp_test])),
tol2,
label = "SIMPLS scale=FALSE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(simpls_gesearch$rmse_reconstruction) - drop(simpls_opls_rec)),
tol,
label = "SIMPLS scale=FALSE: opls_gesearch vs reconstruction_error"
)
# Xscale should be all ones when scale = FALSE
expect_true(
all(simpls_basics$transf$Xscale == 1),
label = "SIMPLS scale=FALSE: Xscale should be all ones (opls_get_basics)"
)
expect_true(
all(simpls_opls$transf$Xscale == 1),
label = "SIMPLS scale=FALSE: Xscale should be all ones (opls)"
)
})
test_that("mpls implementations produce consistent results with scale = TRUE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
# Fit models
mpls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 100,
tol = 1e-06,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = TRUE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "mpls"
)
# Predictions
mpls_basics_predict <- resemble:::predict_opls(
bo = mpls_basics$bo,
b = mpls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = mpls_basics$transf$Xscale
)
mpls_opls_predict <- resemble:::predict_opls(
bo = mpls_opls$bo,
b = mpls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = mpls_opls$transf$Xscale
)
# Reconstruction error
mpls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = mpls_opls$projection_mat,
xloadings = mpls_opls$X_loadings,
scale = TRUE,
Xcenter = mpls_opls$transf$Xcenter,
Xscale = mpls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(mpls_projection$weights - mpls_basics$weights)),
tol,
label = "mpls scale=TRUE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(mpls_basics$weights - mpls_opls$weights)),
tol,
label = "mpls scale=TRUE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(mpls_basics_predict - mpls_opls_predict)),
tol,
label = "mpls scale=TRUE: predict_opls consistency"
)
expect_lt(
sum(abs(mpls_projection$scores - mpls_opls$scores)),
tol,
label = "mpls scale=TRUE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(mpls_gesearch$pred_response - mpls_opls_predict[, ncomp_test])),
tol,
label = "mpls scale=TRUE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(mpls_gesearch$rmse_reconstruction) - drop(mpls_opls_rec)),
tol,
label = "mpls scale=TRUE: opls_gesearch vs reconstruction_error"
)
})
test_that("mpls implementations produce consistent results with scale = FALSE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol <- 1e-8
# Fit models
mpls_basics <- resemble:::opls_get_basics(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 100,
tol = 1e-06,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_opls <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_projection <- resemble:::opls_for_projection(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
pcSelmethod = "manual",
pcSelvalue = ncomp_test,
algorithm = "mpls",
xls_min_w = 3,
xls_max_w = 15
)
mpls_gesearch <- resemble:::opls_gesearch(
Xr = train_x,
Yr = as.matrix(train_y),
Xu = test_x,
ncomp = ncomp_test,
scale = FALSE,
response = TRUE,
reconstruction = TRUE,
similarity = TRUE,
fresponse = TRUE,
algorithm = "mpls"
)
# Predictions
mpls_basics_predict <- resemble:::predict_opls(
bo = mpls_basics$bo,
b = mpls_basics$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = mpls_basics$transf$Xscale
)
mpls_opls_predict <- resemble:::predict_opls(
bo = mpls_opls$bo,
b = mpls_opls$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = mpls_opls$transf$Xscale
)
# Reconstruction error
mpls_opls_rec <- resemble:::reconstruction_error(
x = test_x,
projection_mat = mpls_opls$projection_mat,
xloadings = mpls_opls$X_loadings,
scale = FALSE,
Xcenter = mpls_opls$transf$Xcenter,
Xscale = mpls_opls$transf$Xscale,
scale_back = TRUE
)
# Tests
expect_lt(
sum(abs(mpls_projection$weights - mpls_basics$weights)),
tol,
label = "mpls scale=FALSE: opls_for_projection vs opls_get_basics weights"
)
expect_lt(
sum(abs(mpls_basics$weights - mpls_opls$weights)),
tol,
label = "mpls scale=FALSE: opls_get_basics vs opls weights"
)
expect_lt(
sum(abs(mpls_basics_predict - mpls_opls_predict)),
tol,
label = "mpls scale=FALSE: predict_opls consistency"
)
expect_lt(
sum(abs(mpls_projection$scores - mpls_opls$scores)),
tol,
label = "mpls scale=FALSE: opls_for_projection vs opls scores"
)
expect_lt(
sum(abs(mpls_gesearch$pred_response - mpls_opls_predict[, ncomp_test])),
tol,
label = "mpls scale=FALSE: opls_gesearch vs opls predictions"
)
expect_lt(
abs(drop(mpls_gesearch$rmse_reconstruction) - drop(mpls_opls_rec)),
tol,
label = "mpls scale=FALSE: opls_gesearch vs reconstruction_error"
)
# Xscale should be all ones when scale = FALSE
expect_true(
all(mpls_basics$transf$Xscale == 1),
label = "mpls scale=FALSE: Xscale should be all ones (opls_get_basics)"
)
expect_true(
all(mpls_opls$transf$Xscale == 1),
label = "mpls scale=FALSE: Xscale should be all ones (opls)"
)
})
test_that("PLS and SIMPLS produce equivalent predictions with scale = TRUE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
tol2 <- tol <- 1e-5 # Looser tolerance for cross-algorithm comparison
tol3 <- 1e-3 # Looser tolerance for coefficients comparison
# Fit PLS (NIPALS)
pls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
# Fit SIMPLS
simpls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
# Predictions
pls_predict <- resemble:::predict_opls(
bo = pls_fit$bo,
b = pls_fit$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = pls_fit$transf$Xscale
)
simpls_predict <- resemble:::predict_opls(
bo = simpls_fit$bo,
b = simpls_fit$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = TRUE,
Xscale = simpls_fit$transf$Xscale
)
# PLS and SIMPLS should produce equivalent predictions
expect_lt(
sum(abs(pls_predict - simpls_predict)),
tol2,
label = "PLS vs SIMPLS scale=TRUE: predictions equivalence"
)
# Coefficients should also match
expect_lt(
sum(abs(pls_fit$coefficients - simpls_fit$coefficients)),
tol3,
label = "PLS vs SIMPLS scale=TRUE: coefficients equivalence"
)
})
test_that("PLS and SIMPLS produce equivalent predictions with scale = FALSE", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
tol2 <- 1e-5
tol3 <- 1e-3
# Preprocessing
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
# Split data
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
test_idx <- NIRsoil$train == 0 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
test_x <- NIRsoil$spc_pr[test_idx, ]
ncomp_test <- 40
# Fit PLS (NIPALS)
pls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
# Fit SIMPLS
simpls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
# Predictions
pls_predict <- resemble:::predict_opls(
bo = pls_fit$bo,
b = pls_fit$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = pls_fit$transf$Xscale
)
simpls_predict <- resemble:::predict_opls(
bo = simpls_fit$bo,
b = simpls_fit$coefficients,
ncomp = ncomp_test,
newdata = test_x,
scale = FALSE,
Xscale = simpls_fit$transf$Xscale
)
# PLS and SIMPLS should produce equivalent predictions
expect_lt(
sum(abs(pls_predict - simpls_predict)),
tol2,
label = "PLS vs SIMPLS scale=FALSE: predictions equivalence"
)
# Coefficients should also match
expect_lt(
sum(abs(pls_fit$coefficients - simpls_fit$coefficients)),
tol3,
label = "PLS vs SIMPLS scale=FALSE: coefficients equivalence"
)
})
test_that("PLS overfits training data with many components (scale = TRUE)", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 0, p = 1, w = 17
)
NIRsoil$spc_pr <- sg_det
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
ncomp_test <- 200 #min(dim(train_x)) - 1
# Fit PLS (NIPALS)
pls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
pls_pred <- resemble:::predict_opls(
bo = pls_fit$bo,
b = pls_fit$coefficients,
ncomp = ncomp_test,
newdata = train_x,
scale = TRUE,
Xscale = pls_fit$transf$Xscale
)
pred <- pls_pred[, ncomp_test]
obs <- train_y
residuals <- pred - obs
# With 100 components, training predictions should nearly perfectly match observations
# Any systematic bias or corruption would show up here
# Mean bias should be essentially zero (not shifted up or down)
expect_lt(abs(mean(residuals)), 1e-5,
label = "PLS scale=TRUE: no systematic bias")
# use relative error
expect_lt(max(abs(residuals)) / diff(range(obs)), 0.05,
label = "PLS scale=TRUE: max relative error < 5%")
# R2 should be essentially 1
expect_gt(cor(pred, obs)^2, 0.98,
label = "PLS scale=TRUE: near-perfect fit")
# Slope of pred vs obs should be ~1 (no scaling issues)
slope <- cov(pred, obs) / var(obs)
expect_lt(abs(slope - 1), 0.98,
label = "PLS scale=TRUE: slope near 1")
# Fit SIMPLS
simpls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = TRUE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_pred <- resemble:::predict_opls(
bo = simpls_fit$bo,
b = simpls_fit$coefficients,
ncomp = ncomp_test,
newdata = train_x,
scale = TRUE,
Xscale = simpls_fit$transf$Xscale
)
pred_simpls <- simpls_pred[, ncomp_test]
residuals_simpls <- pred_simpls - obs
expect_lt(abs(mean(residuals_simpls)), 1e-5,
label = "SIMPLS scale=TRUE: no systematic bias")
expect_lt(max(abs(residuals_simpls)) / diff(range(obs)), 0.05,
label = "SIMPLS scale=TRUE: max relative error < 5%")
expect_gt(cor(pred_simpls, obs)^2, 0.98,
label = "SIMPLS scale=TRUE: near-perfect fit")
slope_simpls <- cov(pred_simpls, obs) / var(obs)
expect_lt(abs(slope_simpls - 1), 0.98,
label = "SIMPLS scale=TRUE: slope near 1")
# PLS and SIMPLS should be equivalent
expect_lt(max(abs(pred - pred_simpls)), 1e-5,
label = "PLS vs SIMPLS scale=TRUE: equivalent predictions")
})
test_that("PLS overfits training data with many components (scale = FALSE)", {
skip_on_cran()
skip_if_not_installed("prospectr")
library(prospectr)
data(NIRsoil, package = "prospectr")
sg_det <- savitzkyGolay(
detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))),
m = 1, p = 1, w = 7
)
NIRsoil$spc_pr <- sg_det
train_idx <- NIRsoil$train == 1 & !is.na(NIRsoil$Ciso)
train_x <- NIRsoil$spc_pr[train_idx, ]
train_y <- NIRsoil$Ciso[train_idx]
ncomp_test <- floor(0.75 * min(dim(train_x)) - 1)
# Fit PLS (NIPALS)
pls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "pls",
xls_min_w = 3,
xls_max_w = 15
)
pls_pred <- resemble:::predict_opls(
bo = pls_fit$bo,
b = pls_fit$coefficients,
ncomp = ncomp_test,
newdata = train_x,
scale = FALSE,
Xscale = pls_fit$transf$Xscale
)
pred <- pls_pred[, ncomp_test]
obs <- train_y
residuals <- pred - obs
# plot(pred, obs)
# abline(0, 1)
expect_lt(abs(mean(residuals)), 1e-5,
label = "PLS scale=FALSE: no systematic bias")
expect_lt(max(abs(residuals)) / diff(range(obs)), 0.05,
label = "PLS scale=FALSE: max relative error < 5%")
expect_gt(cor(pred, obs)^2, 0.9999,
label = "PLS scale=FALSE: near-perfect fit")
slope <- cov(pred, obs) / var(obs)
expect_lt(abs(slope - 1), 1e-4,
label = "PLS scale=FALSE: slope near 1")
# Fit SIMPLS
simpls_fit <- resemble:::opls(
X = train_x,
Y = as.matrix(train_y),
ncomp = ncomp_test,
scale = FALSE,
maxiter = 1000,
tol = 1e-06,
algorithm = "simpls",
xls_min_w = 3,
xls_max_w = 15
)
simpls_pred <- resemble:::predict_opls(
bo = simpls_fit$bo,
b = simpls_fit$coefficients,
ncomp = ncomp_test,
newdata = train_x,
scale = FALSE,
Xscale = simpls_fit$transf$Xscale
)
pred_simpls <- simpls_pred[, ncomp_test]
residuals_simpls <- pred_simpls - obs
expect_lt(abs(mean(residuals_simpls)), 1e-5,
label = "SIMPLS scale=FALSE: no systematic bias")
expect_lt(max(abs(residuals)) / diff(range(obs)), 0.05,
label = "SIMPLS scale=FALSE: max relative error < 5%")
expect_gt(cor(pred_simpls, obs)^2, 0.9999,
label = "SIMPLS scale=FALSE: near-perfect fit")
slope_simpls <- cov(pred_simpls, obs) / var(obs)
expect_lt(abs(slope_simpls - 1), 1e-3,
label = "SIMPLS scale=FALSE: slope near 1")
# PLS and SIMPLS should be equivalent
expect_lt(mean(abs(pred - pred_simpls)), 0.04,
label = "PLS vs SIMPLS scale=FALSE: equivalent predictions")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.