Nothing
# Predict function for SEMinR PLS models
#' @export
predict.seminr_model <- function(object, testData, technique = predict_DA, na.print=".", digits=3, ...){
stopifnot(inherits(object, "seminr_model"))
# Abort if received a higher-order-model or moderated model
if (!is.null(object$hoc)) {
message("There is no published solution for applying PLSpredict to higher-order-models")
return()
}
if (!is.null(object$interaction)) {
message("There is no published solution for applying PLSpredict to moderated models")
return()
}
#Extract Measurements needed for Predictions
normData <- testData[,object$mmVariables]
# Standardize data
normData[,object$mmVariables] <- standardize_data(normData[,object$mmVariables],object$meanData[object$mmVariables],object$sdData[object$mmVariables])
#Convert dataset to matrix
normData<-data.matrix(normData)
#Estimate Factor Scores from Outter Path
predicted_construct_scores <- normData%*%object$outer_weights
#Estimate Factor Scores from Inner Path and complete Matrix
predicted_construct_scores <- technique(object$smMatrix, object$path_coef, predicted_construct_scores)
#Predict Measurements with loadings
predictedMeasurements<-predicted_construct_scores%*% t(object$outer_loadings)
# Unstandardize data
predictedMeasurements[,object$mmVariables] <- unstandardize_data(predictedMeasurements[,object$mmVariables],object$meanData[object$mmVariables],object$sdData[object$mmVariables])
#Calculating the residuals
residuals <- testData[,object$mmVariables] - predictedMeasurements[,object$mmVariables]
#Prepare return Object
predictResults <- list(testData = testData[,object$mmVariables],
predicted_items = predictedMeasurements[,object$mmVariables],
item_residuals = residuals)
class(predictResults) <- "predicted_seminr_model"
return(predictResults)
}
#' Predict_pls performs either k-fold or LOOCV on a SEMinR PLS model and generates predictions
#'
#' \code{predict_pls} uses cross-validation to generate in-sample and out-sample predictions for PLS models generated by SEMinR.
#'
#' This function generates cross-validated in-sample and out-sample predictions for PLS models generated by SEMinR. The
#' cross validation technique can be k-fold if a number of folds are specified, or leave-one-out-cross-validation (LOOCV) if no folds
#' arew specified. LOOCV is recommended for small datasets.
#'
#' @param model A SEMinR model that has been estimated on the FULL dataset.
#'
#' @param technique The predictive technique to be employed, Earliest Antecedents (EA) \code{predict_EA} or
#' Direct Antecedents (DA) \code{predict_DA}
#'
#' @param noFolds The required number of folds to use in k-fold cross validation. If NULL, then parallel LOOCV will be executed.
#' Default is NULL.
#'
#' @param reps The number of times the cross-validation will be repeated. Default is NULL.
#'
#' @param cores The number of cores to use for parallel LOOCV processing. If k-fold is used, the process will not be parallelized.
#'
#' @return A list of the estimated PLS and LM prediction results:
#' \item{PLS_out_of_sample}{A matrix of the out-of-sample indicator predictions generated by the SEMinR model.}
#' \item{PLS_in_sample}{A matrix of the in-sample indicator predictions generated by the SEMinR model.}
#' \item{lm_out_of_sample}{A matrix of the out-of-sample indicator predictions generated by a linear regression model.}
#' \item{lm_in_sample}{A matrix of the in-sample indicator predictions generated by a linear regression model.}
#' \item{item_actuals}{A matrix of the actual indicator scores.}
#' \item{PLS_out_of_sample_residuals}{A matrix of the out-of-sample indicator PLS prediction residuals.}
#' \item{PLS_in_sample_residuals}{A matrix of the in-sample indicator PLS prediction residuals.}
#' \item{lm_out_of_sample_residuals}{A matrix of the out-of-sample LM indicator prediction residuals.}
#' \item{lm_in_sample_residuals}{A matrix of the in-sample LM indicator prediction residuals.}
#' \item{mmMatrix}{A Matrix of the measurement model relations.}
#' \item{smMatrix}{A Matrix of the structural model relations.}
#' \item{constructs}{A vector of the construct names.}
#' \item{mmVariables}{A vector of the indicator names.}
#' \item{outer_loadings}{The matrix of estimated indicator loadings.}
#' \item{outer_weights}{The matrix of estimated indicator weights.}
#' \item{path_coef}{The matrix of estimated structural model relationships.}
#' \item{iterations}{A numeric indicating the number of iterations required before the algorithm converged.}
#' \item{weightDiff}{A numeric indicating the minimum weight difference between iterations of the algorithm.}
#' \item{construct_scores}{A matrix of the estimated construct scores for the PLS model.}
#' \item{rSquared}{A matrix of the estimated R Squared for each construct.}
#' \item{inner_weights}{The inner weight estimation function.}
#' \item{data}{A matrix of the data upon which the model was estimated (INcluding interactions.}
#' \item{rawdata}{A matrix of the data upon which the model was estimated (EXcluding interactions.}
#' \item{measurement_model}{The SEMinR measurement model specification.}
#'
#' @usage
#'
#' predict_pls(model, technique, noFolds, reps, cores)
#'
#' @examples
#' data(mobi)
#'
#' # seminr syntax for creating measurement model
#' mobi_mm <- constructs(
#' composite("Image", multi_items("IMAG", 1:5)),
#' composite("Expectation", multi_items("CUEX", 1:3)),
#' composite("Value", multi_items("PERV", 1:2)),
#' composite("Satisfaction", multi_items("CUSA", 1:3))
#' )
#'
#' mobi_sm <- relationships(
#' paths(to = "Satisfaction",
#' from = c("Image", "Expectation", "Value"))
#' )
#'
#' mobi_pls <- estimate_pls(mobi, mobi_mm, mobi_sm)
#' cross_validated_predictions <- predict_pls(model = mobi_pls,
#' technique = predict_DA,
#' noFolds = 10,
#' cores = NULL)
#'
#' @export
predict_pls <- function(model, technique = predict_DA, noFolds = NULL, reps = NULL, cores = NULL) {
stopifnot(inherits(model, "seminr_model"))
# Abort if received a higher-order-model or moderated model
if (!is.null(model$hoc)) {
message("There is no published solution for applying PLSpredict to higher-order-models")
return()
}
if (!is.null(model$interaction)) {
message("There is no published solution for applying PLSpredict to moderated models")
return()
}
# Get endogenous item names
endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)
# shuffle data
order <- sample(nrow(model$data),nrow(model$data), replace = FALSE)
ordered_data <- model$data[order,]
# collect in-sample and out-sample prediction matrices and sort everything to original row indexes
if(is.null(reps)) {
pred_matrices <- prediction_matrices( noFolds, ordered_data, model,technique, cores)
PLS_predicted_outsample_item <- pred_matrices$out_of_sample_item[as.character(c(1:nrow(model$data))),]
PLS_predicted_insample_item <- pred_matrices$in_sample_item[as.character(c(1:nrow(model$data))),]
LM_predicted_outsample_item <- pred_matrices$out_of_sample_lm_item[as.character(c(1:nrow(model$data))),]
LM_predicted_insample_item <- pred_matrices$in_sample_lm_item[as.character(c(1:nrow(model$data))),]
} else {
pls_pred_oos_array <- array(,dim = c(nrow(ordered_data), length(model$mmVariables), reps))
pls_pred_is_array <- array(,dim = c(nrow(ordered_data), length(model$mmVariables), reps))
lm_pred_oos_array <- array(,dim = c(nrow(ordered_data), length(endogenous_items), reps))
lm_pred_is_array <- array(,dim = c(nrow(ordered_data), length(endogenous_items), reps))
for (i in 1:reps) {
pred_matrices <- prediction_matrices( noFolds, ordered_data, model,technique, cores)
pls_pred_oos_array[,,i] <- pred_matrices$out_of_sample_item[as.character(c(1:nrow(model$data))),]
pls_pred_is_array[,,i] <- pred_matrices$in_sample_item[as.character(c(1:nrow(model$data))),]
lm_pred_oos_array[,,i] <- pred_matrices$out_of_sample_lm_item[as.character(c(1:nrow(model$data))),]
lm_pred_is_array[,,i] <- pred_matrices$in_sample_lm_item[as.character(c(1:nrow(model$data))),]
}
PLS_predicted_outsample_item <- apply(pls_pred_oos_array,c(1,2),mean)
PLS_predicted_insample_item <- apply(pls_pred_is_array,c(1,2),mean)
LM_predicted_outsample_item <- apply(lm_pred_oos_array,c(1,2),mean)
LM_predicted_insample_item <- apply(lm_pred_is_array,c(1,2),mean)
colnames(PLS_predicted_outsample_item) <- model$mmVariables
colnames(PLS_predicted_insample_item) <- model$mmVariables
colnames(LM_predicted_outsample_item) <- endogenous_items
colnames(LM_predicted_insample_item) <- endogenous_items
}
# Allocate results
results <- list(PLS_out_of_sample = PLS_predicted_outsample_item[,endogenous_items],
PLS_in_sample = PLS_predicted_insample_item[,endogenous_items],
lm_out_of_sample = LM_predicted_outsample_item,
lm_in_sample = LM_predicted_insample_item,
item_actuals = ordered_data[as.character(c(1:nrow(model$data))),model$mmVariables],
PLS_out_of_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - PLS_predicted_outsample_item[,endogenous_items]),
PLS_in_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - PLS_predicted_insample_item[,endogenous_items]),
lm_out_of_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - LM_predicted_outsample_item),
lm_in_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - LM_predicted_insample_item))
class(results) <- "predict_pls_model"
return(results)
}
# Function to calculate item metrics
item_metrics <- function(pls_prediction_kfold) {
# Genereate IS PLS metrics
PLS_item_prediction_metrics_IS <- convert_to_table_output(
apply(pls_prediction_kfold$PLS_in_sample_residuals, 2, prediction_metrics))
# Generate OOS PLS metrics
PLS_item_prediction_metrics_OOS <- convert_to_table_output(
apply(pls_prediction_kfold$PLS_out_of_sample_residuals, 2, prediction_metrics))
# Generate IS LM metrics
LM_item_prediction_metrics_IS <- convert_to_table_output(
apply(pls_prediction_kfold$lm_in_sample_residuals, 2, prediction_metrics))
# Generate OOS LM metrics
LM_item_prediction_metrics_OOS <- convert_to_table_output(
apply(pls_prediction_kfold$lm_out_of_sample_residuals, 2, prediction_metrics))
# Assign rownames to matrices
rownames(PLS_item_prediction_metrics_IS) <- rownames(PLS_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_OOS) <- c("RMSE","MAE")
rownames(LM_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_IS) <- c("RMSE","MAE")
return(list(PLS_item_prediction_metrics_IS = PLS_item_prediction_metrics_IS,
PLS_item_prediction_metrics_OOS = PLS_item_prediction_metrics_OOS,
LM_item_prediction_metrics_IS = LM_item_prediction_metrics_IS,
LM_item_prediction_metrics_OOS = LM_item_prediction_metrics_OOS))
}
# Check these lib functions ----
# function to subset a smMatrix by construct(x)
subset_by_construct <- function(x, smMatrix) {
smMatrix[smMatrix[,"source"] == c(x), "target"]
}
# Function to check whether a named construct's antecedents occur in a list
construct_antecedent_in_list <- function(x,list, smMatrix) {
all(smMatrix[smMatrix[,"target"]==x,"source"] %in% list)
}
# Function to iterate over a vector of constructs and return the antecedents each construct depends on
depends_on <- function(constructs_vector, smMatrix) {
return(unique(unlist(sapply(constructs_vector, subset_by_construct, smMatrix = smMatrix), use.names = FALSE)))
}
# Function to iterate over a vector of constructs and check whether their antecedents occur in a list
antecedents_in_list <- function(constructs_vector, list,smMatrix) {
as.logical(sapply(constructs_vector, construct_antecedent_in_list, list = list, smMatrix = smMatrix))
}
# Function to organize order of endogenous constructs from most exogenous forwards
construct_order <- function(smMatrix) {
# get purely endogenous and purely exogenous
only_endogenous <- setdiff(unique(smMatrix[,2]), unique(smMatrix[,1]))
only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))
# get construct names
construct_names <- unique(c(smMatrix[,1],smMatrix[,2]))
# get all exogenous constructs
all_exogenous_constructs <- setdiff(construct_names, only_endogenous)
# initialize construct order with first purely exogenous construct
construct_order <- only_exogenous
# Iterate over constructs to generate construct_order
while (!setequal(all_exogenous_constructs,construct_order)) {
construct_order <- c(construct_order,setdiff(depends_on(construct_order,smMatrix)[antecedents_in_list(depends_on(construct_order,smMatrix), construct_order, smMatrix)], construct_order))
}
# return the order of endogenous constructs to be predicted
final_list <- setdiff(construct_order,only_exogenous)
return(c(final_list,only_endogenous))
}
# Function to standardize a matrix by sd vector and mean vector
standardize_data <- function(data_matrix,means_vector,sd_vector) {
return(t(t(sweep(data_matrix,2,means_vector)) / sd_vector))
}
# Function to un-standardize a matrix by sd vector and mean vector
unstandardize_data <- function(data_matrix,means_vector,sd_vector) {
return(sweep((data_matrix %*% diag(sd_vector)),2,means_vector,"+"))
}
#$ Function to sum rows of a matrix
sum_rows <- function(x, matrix, noFolds, constructs) {
return(rowSums(matrix[,(0:(noFolds-1)*length(constructs))+x]))
}
#$ Function to mean rows of a matrix
mean_rows <- function(x, matrix, noFolds, constructs) {
return(rowSums(matrix[,(0:(noFolds-1)*length(constructs))+x])/(noFolds-1))
}
#### Check ----
# Function to return train and test predictions for a model
in_and_out_sample_predictions <- function(x, folds, ordered_data, model,technique) {
testIndexes <- which(folds==x,arr.ind=TRUE)
trainIndexes <- which(folds!=x,arr.ind=TRUE)
testingData <- ordered_data[testIndexes, ]
trainingData <- ordered_data[-testIndexes, ]
# Create matrices for return data
PLS_predicted_outsample_construct <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$constructs),dimnames = list(rownames(ordered_data),model$constructs))
PLS_predicted_insample_construct <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$constructs),dimnames = list(rownames(ordered_data),model$constructs))
PLS_predicted_outsample_item <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
PLS_predicted_insample_item <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
PLS_predicted_insample_item_residuals <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
#PLS prediction on testset model
suppressMessages(train_model <- seminr::estimate_pls(data = trainingData,
measurement_model = model$measurement_model,
structural_model = model$smMatrix,
inner_weights = model$inner_weights,
missing = model$settings$missing,
missing_value = model$settings$missing_value,
maxIt = model$settings$maxIt,
stopCriterion = model$settings$stopCriterion))
test_predictions <- stats::predict(object = train_model,
testData = testingData,
technique = technique)
PLS_predicted_outsample_item[testIndexes,] <- test_predictions$predicted_items
#PLS prediction on trainset model
train_predictions <- stats::predict(object = train_model,
testData = trainingData,
technique = technique)
PLS_predicted_insample_item[trainIndexes,] <- train_predictions$predicted_items
PLS_predicted_insample_item_residuals[trainIndexes,] <- as.matrix(train_predictions$item_residuals)
## Perform prediction on LM models for benchmark
# Identify endogenous items
endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)
#LM Matrices
lm_holder <- sapply(unique(model$smMatrix[,2]), generate_lm_predictions, model = model,
ordered_data = ordered_data[,model$mmVariables],
testIndexes = testIndexes,
endogenous_items = endogenous_items,
trainIndexes = trainIndexes)
lmprediction_in_sample <- matrix(0, ncol = 0 , nrow = length(trainIndexes))
lmprediction_out_sample <- matrix(0, ncol = 0 , nrow = length(testIndexes))
lmprediction_in_sample_residuals <- matrix(0,nrow=nrow(ordered_data),ncol=length(endogenous_items),byrow =TRUE,dimnames = list(rownames(ordered_data),endogenous_items))
# collect the odd and even numbered matrices from the matrices return object
lmprediction_in_sample <- do.call(cbind, lm_holder[((1:(length(unique(model$smMatrix[,2]))*2))[1:(length(unique(model$smMatrix[,2]))*2)%%2==1])])
lmprediction_out_sample <- do.call(cbind, lm_holder[((1:(length(unique(model$smMatrix[,2]))*2))[1:(length(unique(model$smMatrix[,2]))*2)%%2==0])])
lmprediction_in_sample_residuals[trainIndexes,] <- as.matrix(ordered_data[trainIndexes,endogenous_items]) - lmprediction_in_sample[trainIndexes,endogenous_items]
return(list(PLS_predicted_insample_item = PLS_predicted_insample_item,
PLS_predicted_outsample_item = PLS_predicted_outsample_item,
LM_predicted_insample_item = lmprediction_in_sample,
LM_predicted_outsample_item = lmprediction_out_sample,
PLS_predicted_insample_item_residuals = PLS_predicted_insample_item_residuals,
LM_predicted_insample_item_residuals = lmprediction_in_sample_residuals))
}
# Function to collect and parse prediction matrices
prediction_matrices <- function(noFolds, ordered_data, model,technique, cores) {
out <- tryCatch(
{
# If noFolds is NULL, perform parallel LOOCV
if (is.null(noFolds)) {
# Automatically perform LOOCV if number of folds not specified
noFolds = nrow(model$data)
#Create noFolds equally sized folds
folds <- cut(seq(1,nrow(ordered_data)),breaks=noFolds,labels=FALSE)
# Create cluster
suppressWarnings(ifelse(is.null(cores), cl <- parallel::makeCluster(parallel::detectCores()), cl <- parallel::makeCluster(cores)))
#generate_lm_predictions <- PLSpredict:::generate_lm_predictions
#predict_lm_matrices <- PLSpredict:::predict_lm_matrices
# Export variables and functions to cluster
parallel::clusterExport(cl=cl, varlist=c("generate_lm_predictions",
"predict_lm_matrices"), envir=environment())
# Execute the bootstrap
utils::capture.output(matrices <- parallel::parSapply(cl,1:noFolds,in_and_out_sample_predictions,folds = folds,
ordered_data = ordered_data,
model = model,
technique = technique))
# Stop cluster
parallel::stopCluster(cl)
} else {
#Create noFolds equally sized folds
folds <- cut(seq(1,nrow(ordered_data)),breaks=noFolds,labels=FALSE)
matrices <- sapply(1:noFolds, in_and_out_sample_predictions, folds = folds,ordered_data = ordered_data, model = model, technique = technique)
}
# collect the odd and even numbered matrices from the matrices return object
in_sample_item_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==1]])
out_sample_item_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==2]])
in_sample_lm_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==3]])
out_sample_lm_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==4]])
PLS_in_sample_item_residuals <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==5]])
LM_in_sample_item_residuals <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==0]])
# mean the in-sample item predictions by row
average_insample_item <- sapply(1:length(model$mmVariables), mean_rows, matrix = in_sample_item_matrix,
noFolds = noFolds,
constructs = model$mmVariables)
# sum the out-sample item predictions by row
average_outsample_item <- sapply(1:length(model$mmVariables), sum_rows, matrix = out_sample_item_matrix,
noFolds = noFolds,
constructs = model$mmVariables)
# square the out-sample pls residuals, mean them and take the square root
average_insample_pls_item_residuals <- sqrt(sapply(1:length(model$mmVariables), mean_rows, matrix = PLS_in_sample_item_residuals^2,
noFolds = noFolds,
constructs = model$mmVariables))
# Collect endogenous items
endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)
# mean the in-sample lm predictions by row
average_insample_lm <- sapply(1:length(endogenous_items), mean_rows, matrix = in_sample_lm_matrix,
noFolds = noFolds,
constructs = endogenous_items)
# sum the out-sample item predictions by row
average_outsample_lm <- sapply(1:length(endogenous_items), sum_rows, matrix = out_sample_lm_matrix,
noFolds = noFolds,
constructs = endogenous_items)
# square the out-sample lm residuals, mean them, and take square root
average_insample_lm_item_residuals <- sqrt(sapply(1:length(endogenous_items), mean_rows, matrix = LM_in_sample_item_residuals^2,
noFolds = noFolds,
constructs = endogenous_items))
colnames(average_insample_item) <- colnames(average_insample_pls_item_residuals) <- colnames(average_outsample_item) <- model$mmVariables
colnames(average_insample_lm) <- colnames(average_outsample_lm) <- colnames(average_insample_lm_item_residuals) <- endogenous_items
return(list(out_of_sample_item = average_outsample_item,
in_sample_item = average_insample_item,
out_of_sample_lm_item = average_outsample_lm,
in_sample_lm_item = average_insample_lm,
pls_in_sample_item_residuals = average_insample_pls_item_residuals,
lm_in_sample_item_residuals = average_insample_lm_item_residuals))
},
error=function(cond) {
message("Parallel encountered this ERROR: ")
message(cond)
parallel::stopCluster(cl)
return(NULL)
},
warning=function(cond) {
message("Parallel encountered this WARNING:")
message(cond)
parallel::stopCluster(cl)
return(NULL)
},
finally={
#
}
)
}
# Function to return the RMSE and MAE of a score
prediction_metrics <- function(residuals) {
RMSE <- sqrt(mean(residuals^2))
MAE <- mean(abs(residuals))
return(matrix(c(RMSE,MAE), nrow = 2, ncol = 1, byrow = TRUE))
}
predict_lm_matrices <- function(x, depTrainData, indepTrainData,indepTestData, endogenous_items) {
# Train LM
trainLM <- stats::lm(depTrainData[,x] ~ ., indepTrainData)
# Predict out of sample
lmprediction_out_sample <- stats::predict(trainLM, newdata = indepTestData)
# Predict in sample
lmprediction_in_sample <- stats::predict(trainLM, newdata = indepTrainData)
return(list(lm_prediction_in_sample = lmprediction_in_sample,
lm_prediction_out_sample = lmprediction_out_sample))
}
generate_lm_predictions <- function(x, model, ordered_data, testIndexes, endogenous_items, trainIndexes) {
# Extract the target and non-target variables for Linear Model
dependant_items <- model$mmMatrix[model$mmMatrix[,1] == x,2]
# Create matrix return object holders
in_sample_matrix <- matrix(0,nrow = nrow(ordered_data), ncol = length(dependant_items), dimnames = list(rownames(ordered_data),dependant_items))
out_sample_matrix <- matrix(0,nrow = nrow(ordered_data), ncol = length(dependant_items), dimnames = list(rownames(ordered_data),dependant_items))
# Exclude dependant items from independant matrix
independant_matrix <- ordered_data[ , -which(names(ordered_data) %in% dependant_items)]
dependant_matrix <- as.matrix(ordered_data[,dependant_items])
# Create independant items matrices - training and testing
indepTestData <- independant_matrix[testIndexes, ]
indepTrainData <- independant_matrix[-testIndexes, ]
# Create dependant matrices - training and testing
if (length(testIndexes) == 1) {
depTestData <- t(as.matrix(dependant_matrix[testIndexes, ]))
} else {
depTestData <- as.matrix(dependant_matrix[testIndexes, ])
}
#depTestData <- as.matrix(dependant_matrix[testIndexes, ])
depTrainData <- as.matrix(dependant_matrix[-testIndexes, ])
colnames(depTrainData) <- colnames(depTestData) <- dependant_items
lm_prediction_list <- sapply(dependant_items, predict_lm_matrices, depTrainData = depTrainData,
indepTrainData = indepTrainData,
indepTestData = indepTestData,
endogenous_items = endogenous_items)
in_sample_matrix[trainIndexes,] <- matrix(unlist(lm_prediction_list[(1:length(lm_prediction_list))[1:length(lm_prediction_list)%%2==1]]), ncol = length(dependant_items), nrow = nrow(depTrainData), dimnames = list(rownames(depTrainData),dependant_items))
out_sample_matrix[testIndexes,] <- matrix(unlist(lm_prediction_list[(1:length(lm_prediction_list))[1:length(lm_prediction_list)%%2==0]]), ncol = length(dependant_items), nrow = nrow(depTestData), dimnames = list(rownames(depTestData),dependant_items))
return(list(in_sample_matrix, out_sample_matrix))
}
#' Predictive Scheme
#'
#' \code{predict_EA} and \code{predict_DA} specify the predictive scheme to be used in the generation of the
#' predictions. EA refers to Earliest Antecedents nad DA to Direct Antecedents.
#'
#' @param smMatrix is the \code{structural_model} - a source-to-target matrix representing the inner/structural model,
#' generated by \code{relationships} generated by SEMinR.
#'
#' @param path_coef is the Path Coefficients matrix from a SEMinR model.
#'
#' @param construct_scores is the matrix of construct scores generated by SEMinR.
#'
#' @usage
#' predict_EA(smMatrix, path_coef, construct_scores)
#'
#' @export
predict_EA <- function(smMatrix, path_coef, construct_scores) {
order <- construct_order(smMatrix)
only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))
return_matrix <- construct_scores
return_matrix[,order] <- 0
for (construct in order) {
return_matrix[,construct] <- return_matrix %*% path_coef[,construct]
}
return(return_matrix)
}
#' Predictive Scheme
#'
#' \code{predict_EA} and \code{predict_DA} specify the predictive scheme to be used in the generation of the
#' predictions. EA refers to Earliest Antecedents nad DA to Direct Antecedents.
#'
#' @param smMatrix is the \code{structural_model} - a source-to-target matrix representing the inner/structural model,
#' generated by \code{relationships} generated by SEMinR.
#'
#' @param path_coef is the Path Coefficients matrix from a SEMinR model.
#'
#' @param construct_scores is the matrix of construct scores generated by SEMinR.
#'
#' @usage
#' predict_DA(smMatrix, path_coef, construct_scores)
#'
#' @export
predict_DA <- function(smMatrix, path_coef, construct_scores) {
only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))
return_matrix <- construct_scores%*%path_coef
return_matrix[,only_exogenous] <- construct_scores[,only_exogenous]
return(return_matrix)
}
return_predict_error <- function(object, indicator) {
object$prediction_error[,indicator]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.