R/feature_plspredict.R

Defines functions return_predict_error predict_DA predict_EA generate_lm_predictions predict_lm_matrices prediction_metrics prediction_matrices in_and_out_sample_predictions mean_rows sum_rows unstandardize_data standardize_data construct_order antecedents_in_list depends_on construct_antecedent_in_list subset_by_construct item_metrics predict_pls predict.seminr_model

Documented in predict_DA predict_EA predict_pls

# Predict function for SEMinR PLS models
#' @export
predict.seminr_model <- function(object, testData, technique = predict_DA, na.print=".", digits=3, ...){
  stopifnot(inherits(object, "seminr_model"))

  # Abort if received a higher-order-model or moderated model
  if (!is.null(object$hoc)) {
    message("There is no published solution for applying PLSpredict to higher-order-models")
    return()
  }
  if (!is.null(object$interaction)) {
    message("There is no published solution for applying PLSpredict to moderated models")
    return()
  }

  #Extract Measurements needed for Predictions
  normData <- testData[,object$mmVariables]

  # Standardize data
  normData[,object$mmVariables] <- standardize_data(normData[,object$mmVariables],object$meanData[object$mmVariables],object$sdData[object$mmVariables])

  #Convert dataset to matrix
  normData<-data.matrix(normData)

  #Estimate Factor Scores from Outter Path
  predicted_construct_scores <- normData%*%object$outer_weights

  #Estimate Factor Scores from Inner Path and complete Matrix
  predicted_construct_scores <- technique(object$smMatrix, object$path_coef, predicted_construct_scores)

  #Predict Measurements with loadings
  predictedMeasurements<-predicted_construct_scores%*% t(object$outer_loadings)

  # Unstandardize data
  predictedMeasurements[,object$mmVariables] <- unstandardize_data(predictedMeasurements[,object$mmVariables],object$meanData[object$mmVariables],object$sdData[object$mmVariables])

  #Calculating the residuals
  residuals <- testData[,object$mmVariables] - predictedMeasurements[,object$mmVariables]

  #Prepare return Object
  predictResults <- list(testData = testData[,object$mmVariables],
                         predicted_items = predictedMeasurements[,object$mmVariables],
                         item_residuals = residuals)

  class(predictResults) <- "predicted_seminr_model"
  return(predictResults)
}

#' Predict_pls performs either k-fold or LOOCV on a SEMinR PLS model and generates predictions
#'
#' \code{predict_pls} uses cross-validation to generate in-sample and out-sample predictions for PLS models generated by SEMinR.
#'
#' This function generates cross-validated in-sample and out-sample predictions for PLS models generated by SEMinR. The
#' cross validation technique can be k-fold if a number of folds are specified, or leave-one-out-cross-validation (LOOCV) if no folds
#' arew specified. LOOCV is recommended for small datasets.
#'
#' @param model A SEMinR model that has been estimated on the FULL dataset.
#'
#' @param technique The predictive technique to be employed, Earliest Antecedents (EA) \code{predict_EA} or
#' Direct Antecedents (DA) \code{predict_DA}
#'
#' @param noFolds The required number of folds to use in k-fold cross validation. If NULL, then parallel LOOCV will be executed.
#' Default is NULL.
#'
#' @param reps The number of times the cross-validation will be repeated. Default is NULL.
#'
#' @param cores The number of cores to use for parallel LOOCV processing. If k-fold is used, the process will not be parallelized.
#'
#' @return A list of the estimated PLS and LM prediction results:
#'  \item{PLS_out_of_sample}{A matrix of the out-of-sample indicator predictions generated by the SEMinR model.}
#'  \item{PLS_in_sample}{A matrix of the in-sample indicator predictions generated by the SEMinR model.}
#'  \item{lm_out_of_sample}{A matrix of the out-of-sample indicator predictions generated by a linear regression model.}
#'  \item{lm_in_sample}{A matrix of the in-sample indicator predictions generated by a linear regression model.}
#'  \item{item_actuals}{A matrix of the actual indicator scores.}
#'  \item{PLS_out_of_sample_residuals}{A matrix of the out-of-sample indicator PLS prediction residuals.}
#'  \item{PLS_in_sample_residuals}{A matrix of the in-sample indicator PLS prediction residuals.}
#'  \item{lm_out_of_sample_residuals}{A matrix of the out-of-sample LM indicator prediction residuals.}
#'  \item{lm_in_sample_residuals}{A matrix of the in-sample LM indicator prediction residuals.}
#'  \item{mmMatrix}{A Matrix of the measurement model relations.}
#'  \item{smMatrix}{A Matrix of the structural model relations.}
#'  \item{constructs}{A vector of the construct names.}
#'  \item{mmVariables}{A vector of the indicator names.}
#'  \item{outer_loadings}{The matrix of estimated indicator loadings.}
#'  \item{outer_weights}{The matrix of estimated indicator weights.}
#'  \item{path_coef}{The matrix of estimated structural model relationships.}
#'  \item{iterations}{A numeric indicating the number of iterations required before the algorithm converged.}
#'  \item{weightDiff}{A numeric indicating the minimum weight difference between iterations of the algorithm.}
#'  \item{construct_scores}{A matrix of the estimated construct scores for the PLS model.}
#'  \item{rSquared}{A matrix of the estimated R Squared for each construct.}
#'  \item{inner_weights}{The inner weight estimation function.}
#'  \item{data}{A matrix of the data upon which the model was estimated (INcluding interactions.}
#'  \item{rawdata}{A matrix of the data upon which the model was estimated (EXcluding interactions.}
#'  \item{measurement_model}{The SEMinR measurement model specification.}
#'
#' @usage
#'
#' predict_pls(model, technique, noFolds, reps, cores)
#'
#' @examples
#' data(mobi)
#'
#' # seminr syntax for creating measurement model
#' mobi_mm <- constructs(
#'   composite("Image",        multi_items("IMAG", 1:5)),
#'   composite("Expectation",  multi_items("CUEX", 1:3)),
#'   composite("Value",        multi_items("PERV", 1:2)),
#'   composite("Satisfaction", multi_items("CUSA", 1:3))
#' )
#'
#' mobi_sm <- relationships(
#'   paths(to = "Satisfaction",
#'         from = c("Image", "Expectation", "Value"))
#' )
#'
#' mobi_pls <- estimate_pls(mobi, mobi_mm, mobi_sm)
#' cross_validated_predictions <- predict_pls(model = mobi_pls,
#'                                            technique = predict_DA,
#'                                            noFolds = 10,
#'                                            cores = NULL)
#'
#' @export
predict_pls <- function(model, technique = predict_DA, noFolds = NULL, reps = NULL, cores = NULL) {

  stopifnot(inherits(model, "seminr_model"))
  # Abort if received a higher-order-model or moderated model
  if (!is.null(model$hoc)) {
    message("There is no published solution for applying PLSpredict to higher-order-models")
    return()
  }
  if (!is.null(model$interaction)) {
    message("There is no published solution for applying PLSpredict to moderated models")
    return()
  }
  # Get endogenous item names
  endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)

  # shuffle data
  order <- sample(nrow(model$data),nrow(model$data), replace = FALSE)
  ordered_data <- model$data[order,]

  # collect in-sample and out-sample prediction matrices and sort everything to original row indexes
  if(is.null(reps)) {
    pred_matrices <- prediction_matrices( noFolds, ordered_data, model,technique, cores)
    PLS_predicted_outsample_item <- pred_matrices$out_of_sample_item[as.character(c(1:nrow(model$data))),]
    PLS_predicted_insample_item <- pred_matrices$in_sample_item[as.character(c(1:nrow(model$data))),]
    LM_predicted_outsample_item <- pred_matrices$out_of_sample_lm_item[as.character(c(1:nrow(model$data))),]
    LM_predicted_insample_item <- pred_matrices$in_sample_lm_item[as.character(c(1:nrow(model$data))),]
  } else {
    pls_pred_oos_array <- array(,dim = c(nrow(ordered_data), length(model$mmVariables), reps))
    pls_pred_is_array <- array(,dim = c(nrow(ordered_data), length(model$mmVariables), reps))
    lm_pred_oos_array <- array(,dim = c(nrow(ordered_data), length(endogenous_items), reps))
    lm_pred_is_array <- array(,dim = c(nrow(ordered_data), length(endogenous_items), reps))
    for (i in 1:reps) {
      pred_matrices <- prediction_matrices( noFolds, ordered_data, model,technique, cores)
      pls_pred_oos_array[,,i] <- pred_matrices$out_of_sample_item[as.character(c(1:nrow(model$data))),]
      pls_pred_is_array[,,i] <- pred_matrices$in_sample_item[as.character(c(1:nrow(model$data))),]
      lm_pred_oos_array[,,i] <- pred_matrices$out_of_sample_lm_item[as.character(c(1:nrow(model$data))),]
      lm_pred_is_array[,,i] <- pred_matrices$in_sample_lm_item[as.character(c(1:nrow(model$data))),]
    }
    PLS_predicted_outsample_item <- apply(pls_pred_oos_array,c(1,2),mean)
    PLS_predicted_insample_item <- apply(pls_pred_is_array,c(1,2),mean)
    LM_predicted_outsample_item <- apply(lm_pred_oos_array,c(1,2),mean)
    LM_predicted_insample_item <- apply(lm_pred_is_array,c(1,2),mean)
    colnames(PLS_predicted_outsample_item) <- model$mmVariables
    colnames(PLS_predicted_insample_item) <- model$mmVariables
    colnames(LM_predicted_outsample_item) <- endogenous_items
    colnames(LM_predicted_insample_item) <- endogenous_items
  }


  # Allocate results
  results <- list(PLS_out_of_sample = PLS_predicted_outsample_item[,endogenous_items],
                  PLS_in_sample = PLS_predicted_insample_item[,endogenous_items],
                  lm_out_of_sample = LM_predicted_outsample_item,
                  lm_in_sample = LM_predicted_insample_item,
                  item_actuals = ordered_data[as.character(c(1:nrow(model$data))),model$mmVariables],
                  PLS_out_of_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - PLS_predicted_outsample_item[,endogenous_items]),
                  PLS_in_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - PLS_predicted_insample_item[,endogenous_items]),
                  lm_out_of_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - LM_predicted_outsample_item),
                  lm_in_sample_residuals = (ordered_data[as.character(c(1:nrow(model$data))),endogenous_items] - LM_predicted_insample_item))
  class(results) <- "predict_pls_model"
  return(results)
}

# Function to calculate item metrics
item_metrics <- function(pls_prediction_kfold) {

  # Genereate IS PLS metrics
  PLS_item_prediction_metrics_IS <- convert_to_table_output(
    apply(pls_prediction_kfold$PLS_in_sample_residuals, 2, prediction_metrics))

  # Generate OOS PLS metrics
  PLS_item_prediction_metrics_OOS <- convert_to_table_output(
    apply(pls_prediction_kfold$PLS_out_of_sample_residuals, 2, prediction_metrics))

  # Generate IS LM metrics
  LM_item_prediction_metrics_IS <- convert_to_table_output(
    apply(pls_prediction_kfold$lm_in_sample_residuals, 2, prediction_metrics))

  # Generate OOS LM metrics
  LM_item_prediction_metrics_OOS <- convert_to_table_output(
    apply(pls_prediction_kfold$lm_out_of_sample_residuals, 2, prediction_metrics))

  # Assign rownames to matrices
  rownames(PLS_item_prediction_metrics_IS) <- rownames(PLS_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_OOS) <- c("RMSE","MAE")
  rownames(LM_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_IS) <- c("RMSE","MAE")

  return(list(PLS_item_prediction_metrics_IS = PLS_item_prediction_metrics_IS,
              PLS_item_prediction_metrics_OOS = PLS_item_prediction_metrics_OOS,
              LM_item_prediction_metrics_IS = LM_item_prediction_metrics_IS,
              LM_item_prediction_metrics_OOS = LM_item_prediction_metrics_OOS))
}

# Check these lib functions ----
# function to subset a smMatrix by construct(x)
subset_by_construct <- function(x, smMatrix) {
  smMatrix[smMatrix[,"source"] == c(x), "target"]
}

# Function to check whether a named construct's antecedents occur in a list
construct_antecedent_in_list <- function(x,list, smMatrix) {
  all(smMatrix[smMatrix[,"target"]==x,"source"] %in% list)
}

# Function to iterate over a vector of constructs and return the antecedents each construct depends on
depends_on <- function(constructs_vector, smMatrix) {
  return(unique(unlist(sapply(constructs_vector, subset_by_construct, smMatrix = smMatrix), use.names = FALSE)))
}

# Function to iterate over a vector of constructs and check whether their antecedents occur in a list
antecedents_in_list <- function(constructs_vector, list,smMatrix) {
  as.logical(sapply(constructs_vector, construct_antecedent_in_list, list = list, smMatrix = smMatrix))
}

# Function to organize order of endogenous constructs from most exogenous forwards
construct_order <- function(smMatrix) {

  # get purely endogenous and purely exogenous
  only_endogenous <- setdiff(unique(smMatrix[,2]), unique(smMatrix[,1]))
  only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))

  # get construct names
  construct_names <- unique(c(smMatrix[,1],smMatrix[,2]))

  # get all exogenous constructs
  all_exogenous_constructs <- setdiff(construct_names, only_endogenous)

  # initialize construct order with first purely exogenous construct
  construct_order <- only_exogenous

  # Iterate over constructs to generate construct_order
  while (!setequal(all_exogenous_constructs,construct_order)) {
    construct_order <- c(construct_order,setdiff(depends_on(construct_order,smMatrix)[antecedents_in_list(depends_on(construct_order,smMatrix), construct_order, smMatrix)], construct_order))
  }

  # return the order of endogenous constructs to be predicted
  final_list <- setdiff(construct_order,only_exogenous)
  return(c(final_list,only_endogenous))

}

# Function to standardize a matrix by sd vector and mean vector
standardize_data <- function(data_matrix,means_vector,sd_vector) {
  return(t(t(sweep(data_matrix,2,means_vector)) / sd_vector))
}

# Function to un-standardize a matrix by sd vector and mean vector
unstandardize_data <- function(data_matrix,means_vector,sd_vector) {
  return(sweep((data_matrix %*% diag(sd_vector)),2,means_vector,"+"))
}

#$ Function to sum rows of a matrix
sum_rows <- function(x, matrix, noFolds, constructs) {
  return(rowSums(matrix[,(0:(noFolds-1)*length(constructs))+x]))
}

#$ Function to mean rows of a matrix
mean_rows <- function(x, matrix, noFolds, constructs) {
  return(rowSums(matrix[,(0:(noFolds-1)*length(constructs))+x])/(noFolds-1))
}

#### Check ----

# Function to return train and test predictions for a model
in_and_out_sample_predictions <- function(x, folds, ordered_data, model,technique) {
  testIndexes <- which(folds==x,arr.ind=TRUE)
  trainIndexes <- which(folds!=x,arr.ind=TRUE)
  testingData <- ordered_data[testIndexes, ]
  trainingData <- ordered_data[-testIndexes, ]

  # Create matrices for return data
  PLS_predicted_outsample_construct <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$constructs),dimnames = list(rownames(ordered_data),model$constructs))
  PLS_predicted_insample_construct <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$constructs),dimnames = list(rownames(ordered_data),model$constructs))
  PLS_predicted_outsample_item <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
  PLS_predicted_insample_item <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
  PLS_predicted_insample_item_residuals <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
  #PLS prediction on testset model
  suppressMessages(train_model <- seminr::estimate_pls(data = trainingData,
                                                       measurement_model = model$measurement_model,
                                                       structural_model = model$smMatrix,
                                                       inner_weights = model$inner_weights,
                                                       missing = model$settings$missing,
                                                       missing_value = model$settings$missing_value,
                                                       maxIt = model$settings$maxIt,
                                                       stopCriterion = model$settings$stopCriterion))
  test_predictions <- stats::predict(object = train_model,
                                     testData = testingData,
                                     technique = technique)

  PLS_predicted_outsample_item[testIndexes,] <- test_predictions$predicted_items


  #PLS prediction on trainset model
  train_predictions <- stats::predict(object = train_model,
                                      testData = trainingData,
                                      technique = technique)

  PLS_predicted_insample_item[trainIndexes,] <- train_predictions$predicted_items
  PLS_predicted_insample_item_residuals[trainIndexes,] <- as.matrix(train_predictions$item_residuals)

  ## Perform prediction on LM models for benchmark
  # Identify endogenous items
  endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)

  #LM Matrices
  lm_holder <- sapply(unique(model$smMatrix[,2]), generate_lm_predictions, model = model,
                      ordered_data = ordered_data[,model$mmVariables],
                      testIndexes = testIndexes,
                      endogenous_items = endogenous_items,
                      trainIndexes = trainIndexes)

  lmprediction_in_sample <- matrix(0, ncol = 0 , nrow = length(trainIndexes))
  lmprediction_out_sample <- matrix(0, ncol = 0 , nrow = length(testIndexes))
  lmprediction_in_sample_residuals <- matrix(0,nrow=nrow(ordered_data),ncol=length(endogenous_items),byrow =TRUE,dimnames = list(rownames(ordered_data),endogenous_items))

  # collect the odd and even numbered matrices from the matrices return object
  lmprediction_in_sample <- do.call(cbind, lm_holder[((1:(length(unique(model$smMatrix[,2]))*2))[1:(length(unique(model$smMatrix[,2]))*2)%%2==1])])
  lmprediction_out_sample <- do.call(cbind, lm_holder[((1:(length(unique(model$smMatrix[,2]))*2))[1:(length(unique(model$smMatrix[,2]))*2)%%2==0])])
  lmprediction_in_sample_residuals[trainIndexes,] <- as.matrix(ordered_data[trainIndexes,endogenous_items]) - lmprediction_in_sample[trainIndexes,endogenous_items]

  return(list(PLS_predicted_insample_item = PLS_predicted_insample_item,
              PLS_predicted_outsample_item = PLS_predicted_outsample_item,
              LM_predicted_insample_item = lmprediction_in_sample,
              LM_predicted_outsample_item = lmprediction_out_sample,
              PLS_predicted_insample_item_residuals = PLS_predicted_insample_item_residuals,
              LM_predicted_insample_item_residuals = lmprediction_in_sample_residuals))
}

# Function to collect and parse prediction matrices
prediction_matrices <- function(noFolds, ordered_data, model,technique, cores) {
  out <- tryCatch(
    {
      # If noFolds is NULL, perform parallel LOOCV
      if (is.null(noFolds)) {
        # Automatically perform LOOCV if number of folds not specified
        noFolds = nrow(model$data)
        #Create noFolds equally sized folds
        folds <- cut(seq(1,nrow(ordered_data)),breaks=noFolds,labels=FALSE)

        # Create cluster
        suppressWarnings(ifelse(is.null(cores), cl <- parallel::makeCluster(parallel::detectCores()), cl <- parallel::makeCluster(cores)))

        #generate_lm_predictions <- PLSpredict:::generate_lm_predictions
        #predict_lm_matrices <- PLSpredict:::predict_lm_matrices
        # Export variables and functions to cluster
        parallel::clusterExport(cl=cl, varlist=c("generate_lm_predictions",
                                                 "predict_lm_matrices"), envir=environment())

        # Execute the bootstrap
        utils::capture.output(matrices <- parallel::parSapply(cl,1:noFolds,in_and_out_sample_predictions,folds = folds,
                                                              ordered_data = ordered_data,
                                                              model = model,
                                                              technique = technique))
        # Stop cluster
        parallel::stopCluster(cl)
      } else {
        #Create noFolds equally sized folds
        folds <- cut(seq(1,nrow(ordered_data)),breaks=noFolds,labels=FALSE)
        matrices <- sapply(1:noFolds, in_and_out_sample_predictions, folds = folds,ordered_data = ordered_data, model = model, technique = technique)
      }

      # collect the odd and even numbered matrices from the matrices return object
      in_sample_item_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==1]])
      out_sample_item_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==2]])
      in_sample_lm_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==3]])
      out_sample_lm_matrix <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==4]])
      PLS_in_sample_item_residuals <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==5]])
      LM_in_sample_item_residuals <- do.call(cbind, matrices[(1:(noFolds*6))[1:(noFolds*6)%%6==0]])

      # mean the in-sample item predictions by row
      average_insample_item <- sapply(1:length(model$mmVariables), mean_rows, matrix = in_sample_item_matrix,
                                      noFolds = noFolds,
                                      constructs = model$mmVariables)

      # sum the out-sample item predictions by row
      average_outsample_item <- sapply(1:length(model$mmVariables), sum_rows, matrix = out_sample_item_matrix,
                                       noFolds = noFolds,
                                       constructs = model$mmVariables)

      # square the out-sample pls residuals, mean them and take the square root
      average_insample_pls_item_residuals <- sqrt(sapply(1:length(model$mmVariables), mean_rows, matrix = PLS_in_sample_item_residuals^2,
                                                         noFolds = noFolds,
                                                         constructs = model$mmVariables))
      # Collect endogenous items
      endogenous_items <- unlist(sapply(unique(model$smMatrix[,2]), function(x) model$mmMatrix[model$mmMatrix[, "construct"] == x,"measurement"]), use.names = FALSE)

      # mean the in-sample lm predictions by row
      average_insample_lm <- sapply(1:length(endogenous_items), mean_rows, matrix = in_sample_lm_matrix,
                                    noFolds = noFolds,
                                    constructs = endogenous_items)

      # sum the out-sample item predictions by row
      average_outsample_lm <- sapply(1:length(endogenous_items), sum_rows, matrix = out_sample_lm_matrix,
                                     noFolds = noFolds,
                                     constructs = endogenous_items)

      # square the out-sample lm residuals, mean them, and take square root
      average_insample_lm_item_residuals <- sqrt(sapply(1:length(endogenous_items), mean_rows, matrix = LM_in_sample_item_residuals^2,
                                                        noFolds = noFolds,
                                                        constructs = endogenous_items))

      colnames(average_insample_item) <- colnames(average_insample_pls_item_residuals) <- colnames(average_outsample_item) <- model$mmVariables
      colnames(average_insample_lm) <- colnames(average_outsample_lm) <- colnames(average_insample_lm_item_residuals) <- endogenous_items

      return(list(out_of_sample_item = average_outsample_item,
                  in_sample_item = average_insample_item,
                  out_of_sample_lm_item = average_outsample_lm,
                  in_sample_lm_item = average_insample_lm,
                  pls_in_sample_item_residuals = average_insample_pls_item_residuals,
                  lm_in_sample_item_residuals = average_insample_lm_item_residuals))
    },
    error=function(cond) {
      message("Parallel encountered this ERROR: ")
      message(cond)
      parallel::stopCluster(cl)
      return(NULL)
    },
    warning=function(cond) {
      message("Parallel encountered this WARNING:")
      message(cond)
      parallel::stopCluster(cl)
      return(NULL)
    },
    finally={
      #
    }
  )
}

# Function to return the RMSE and MAE of a score
prediction_metrics <- function(residuals) {
  RMSE <- sqrt(mean(residuals^2))
  MAE <- mean(abs(residuals))
  return(matrix(c(RMSE,MAE), nrow = 2, ncol = 1, byrow = TRUE))
}

predict_lm_matrices <- function(x, depTrainData, indepTrainData,indepTestData, endogenous_items) {
  # Train LM
  trainLM <- stats::lm(depTrainData[,x] ~ ., indepTrainData)
  # Predict out of sample
  lmprediction_out_sample <- stats::predict(trainLM, newdata = indepTestData)
  # Predict in sample
  lmprediction_in_sample <- stats::predict(trainLM, newdata = indepTrainData)
  return(list(lm_prediction_in_sample = lmprediction_in_sample,
              lm_prediction_out_sample = lmprediction_out_sample))
}

generate_lm_predictions <- function(x, model, ordered_data, testIndexes, endogenous_items, trainIndexes) {
  # Extract the target and non-target variables for Linear Model
  dependant_items <- model$mmMatrix[model$mmMatrix[,1] == x,2]

  # Create matrix return object holders
  in_sample_matrix <- matrix(0,nrow = nrow(ordered_data), ncol = length(dependant_items), dimnames = list(rownames(ordered_data),dependant_items))
  out_sample_matrix <- matrix(0,nrow = nrow(ordered_data), ncol = length(dependant_items), dimnames = list(rownames(ordered_data),dependant_items))

  # Exclude dependant items from independant matrix
  independant_matrix <- ordered_data[ , -which(names(ordered_data) %in% dependant_items)]
  dependant_matrix <- as.matrix(ordered_data[,dependant_items])

  # Create independant items matrices - training and testing
  indepTestData <- independant_matrix[testIndexes, ]
  indepTrainData <- independant_matrix[-testIndexes, ]

  # Create dependant matrices - training and testing
  if (length(testIndexes) == 1) {
    depTestData <- t(as.matrix(dependant_matrix[testIndexes, ]))
  } else {
    depTestData <- as.matrix(dependant_matrix[testIndexes, ])
  }

  #depTestData <- as.matrix(dependant_matrix[testIndexes, ])
  depTrainData <- as.matrix(dependant_matrix[-testIndexes, ])
  colnames(depTrainData) <- colnames(depTestData) <- dependant_items

  lm_prediction_list <- sapply(dependant_items, predict_lm_matrices, depTrainData = depTrainData,
                               indepTrainData = indepTrainData,
                               indepTestData = indepTestData,
                               endogenous_items = endogenous_items)
  in_sample_matrix[trainIndexes,] <- matrix(unlist(lm_prediction_list[(1:length(lm_prediction_list))[1:length(lm_prediction_list)%%2==1]]), ncol = length(dependant_items), nrow = nrow(depTrainData), dimnames = list(rownames(depTrainData),dependant_items))
  out_sample_matrix[testIndexes,] <- matrix(unlist(lm_prediction_list[(1:length(lm_prediction_list))[1:length(lm_prediction_list)%%2==0]]), ncol = length(dependant_items), nrow = nrow(depTestData), dimnames = list(rownames(depTestData),dependant_items))

  return(list(in_sample_matrix, out_sample_matrix))
}

#' Predictive Scheme
#'
#' \code{predict_EA} and \code{predict_DA} specify the predictive scheme to be used in the generation of the
#' predictions. EA refers to Earliest Antecedents nad DA to Direct Antecedents.
#'
#' @param smMatrix is the \code{structural_model} - a source-to-target matrix representing the inner/structural model,
#'  generated by \code{relationships} generated by SEMinR.
#'
#' @param path_coef is the Path Coefficients matrix from a SEMinR model.
#'
#' @param construct_scores is the matrix of construct scores generated by SEMinR.
#'
#' @usage
#'  predict_EA(smMatrix, path_coef, construct_scores)
#'
#' @export
predict_EA <- function(smMatrix, path_coef, construct_scores) {
  order <- construct_order(smMatrix)
  only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))
  return_matrix <- construct_scores
  return_matrix[,order] <- 0
  for (construct in order) {
    return_matrix[,construct] <- return_matrix %*% path_coef[,construct]

  }
  return(return_matrix)
}

#' Predictive Scheme
#'
#' \code{predict_EA} and \code{predict_DA} specify the predictive scheme to be used in the generation of the
#' predictions. EA refers to Earliest Antecedents nad DA to Direct Antecedents.
#'
#' @param smMatrix is the \code{structural_model} - a source-to-target matrix representing the inner/structural model,
#'  generated by \code{relationships} generated by SEMinR.
#'
#' @param path_coef is the Path Coefficients matrix from a SEMinR model.
#'
#' @param construct_scores is the matrix of construct scores generated by SEMinR.
#'
#' @usage
#'  predict_DA(smMatrix, path_coef, construct_scores)
#'
#' @export
predict_DA <- function(smMatrix, path_coef, construct_scores) {
  only_exogenous <- setdiff(unique(smMatrix[,1]), unique(smMatrix[,2]))
  return_matrix <- construct_scores%*%path_coef
  return_matrix[,only_exogenous] <- construct_scores[,only_exogenous]
  return(return_matrix)
}

return_predict_error <- function(object, indicator) {
  object$prediction_error[,indicator]
}

Try the seminr package in your browser

Any scripts or data that you put into this service are public.

seminr documentation built on Oct. 13, 2022, 1:05 a.m.