#' @title weighted_anchor_regression
#'
#' @description Estimates weighted Anchor Regression coefficients
#'
#' @param data_x_list list containing coefficient dataframes for different environments
#' @param data_anchor_list list containing anchor dataframes for different environments
#' @param gamma is the regularization parameter for the Anchor Regression
#' @param target_variable is the target variable name contained in the x dataframe
#' @param anchor_model_pre is the pre estimated model for the Anchor Regression. In case of NULL a new model is estimated.
#' @param test_split is desired test/train split for the estimation
#' @param lambda penalization coefficient for Anchor Shrinkage. Initially set to 0.
#'
#' @return A list estimated coefficients with names, weights and the raw coefficient matrix
#' @export
#' @importFrom stats coef lm
#' @examples
#' environments <- 10 # number of observed environments
#'
#' # populate list with generated data of x and anchor
#' data_x_list <- c()
#' data_anchor_list <- c()
#' for(e in 1:environments){
#' x <- as.data.frame(matrix(data = rnorm(100),nrow = 100,ncol = 10))
#' anchor <- as.data.frame(matrix(data = rnorm(200),nrow = 100,ncol = 2))
#' colnames(anchor) <- c('X1','X2')
#' data_x_list[[e]] <- x
#' data_anchor_list[[e]] <- anchor
#' }
#'
#' # estimate model
#' gamma <- 2
#' target_variable <- 'V2'
#' weighted_anchor_regression(data_x_list,
#' data_anchor_list,
#' gamma,
#' target_variable,
#' anchor_model_pre=NULL,
#' test_split=0.4,
#' lambda=0)
weighted_anchor_regression <- function(data_x_list,data_anchor_list,gamma,target_variable,anchor_model_pre=NULL,test_split=0.4, lambda=0){
# initialize coefficient output matrix and patient score list
coefficient_matrix <- NULL
patient_prediction_scores <- c()
# loop thorugh all environments
for(patient in 1:length(data_anchor_list)){
# create test and train split
smp_size <- floor(test_split * nrow(data_x_list[[patient]]))
train_x_ind <- sample(seq_len(nrow(data_x_list[[patient]])), size = smp_size)
train_anchor_ind <- sample(seq_len(nrow(data_anchor_list[[patient]])), size = smp_size)
train_x <- data_x_list[[patient]][train_x_ind, ]
test_x <- data_x_list[[patient]][-train_x_ind, ]
train_anchor <- data_anchor_list[[patient]][train_anchor_ind, ]
test_anchor <- data_anchor_list[[patient]][-train_anchor_ind, ]
# estimate model if desired
if(is.null(anchor_model_pre) ==TRUE){
anchor_model <- anchor_regression(train_x,train_anchor,gamma,target_variable,lambda)
}else{
anchor_model <- anchor_model_pre
}
# save coefficients
coefficient_matrix <- cbind(coefficient_matrix, anchor_model$coeff)
# predict and calculate the mse for the predictions
prediction <- anchor_prediction(anchor_model$model, test_x, test_anchor, gamma, target_variable)
result <- as.data.frame(test_x[target_variable])
result$prediction <- prediction
result$diff2 <- (result[,1] - result$prediction)^2
mse <- mean(result$diff2)
patient_prediction_scores <- c(patient_prediction_scores, mse)
}
# calculate weights and resulting coefficients
weights <- patient_prediction_scores/sum(patient_prediction_scores)
weighted_coefficients <- weights%*%t(coefficient_matrix)
# return result
return_list = list(coeff = weighted_coefficients, names = anchor_model$names, raw_coeffs = coefficient_matrix,weights = weights)
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.