#' Run lasso estimation on the prepared matrixes
#'
#' @param source_x_mat,source_y_mat,source_w_mat matrixes of independent, dependent variables and weights
#' @param target_x_mat matrixes of independent variables for the prediction sample
#' @param reduced if TRUE terurns reduced outpur without specific regression details.
#' @param n_folds number of folds for cross-validation
#' @param force_lambda allows to specify one lamda value. Shoul be 0, when we
#' want to switch to the linear regression.
#' @inheritParams find_near
#'
#' @return In both \code{reduced=TRUE} and \code{reduced=FALSE} forms, the function returns
#' a list with elements. In the form \code{reduced=TRUE} only results of matching
#' are returned:
#' \itemize{
#' \item \code{source_y_hat} is the vecrtor of predicted values based on the result of
#' the lasso regression with \code{nfolds} cross validation and "mse"
#' measure for identifying minimizing value of lambda. It uses \code{source_x_mat},
#' \code{source_y_mat}, and \code{source_w_mat} for running regression and predictind \code{source_y_hat}.
#' \item \code{target_y_hat} is the vetor of predicted values produced using the
#' \code{source_y_hat} regression results and \code{target_x_mat} independent variables matrix.
#' \item \code{match} is the dataframe with: columns \code{target_id} - index of each
#' \code{target_y_hat} value; column \code{target_y_hat} - its' value; column \code{source_id} -
#' position of the nearest match from the \code{source_y_hat} vector and
#' column \code{source_y_hat} - value of the nearest match
#' }
#'
#' In the not reduced form the list contains more elements:
#' \itemize{
#' \item items \code{source_x_mat}, \code{source_y_mat}, \code{source_w_mat} and \code{target_x_mat} from the inputs to the
#' functions.
#' \item \code{lambda_cv} - result of the \code{\link[glmnet]{cv.glmnet}}.
#' \item \code{fit} - result of the \code{\link[glmnet]{glmnet}}
#' }
#'
#' @export
#'
#' @examples
#' library(dplyr)
#' library(purrr)
#' library(glmnet)
#' library(lassopmm)
#'
#' # Run estimate of a fake data
#' XX <- as.matrix(mtcars[, !names(mtcars) %in% "hp"])
#' YY <- as.matrix(mtcars[, "hp"])
#' WW <- matrix(rep(1, nrow(mtcars)), ncol = 1, nrow = nrow(mtcars))
#' XX1 <- as.matrix(mtcars[1:10, !names(mtcars) %in% "hp"])
#'
#' # Running simple estimation and returning
#' a <- estimate_matches(source_x_mat = XX, source_y_mat = YY,
#' source_w_mat = WW, target_x_mat = XX1,
#' reduced = FALSE, n_near = 5)
#'
#' # Extract regression coefficients
#' a$fit %>% coef()
#' str(a, max.level = 1)
#'
#' # Developing a sample bootsrtap vector
#' perm_example <- purrr::map(1:5, ~ sample(1:nrow(YY), nrow(YY), TRUE))
#'
#' # Running estimations on every single bootstrap permutation vector
#' a_boot <-
#' perm_example %>%
#' purrr::map(~ lassopmm::estimate_matches(
#' source_x_mat = XX[.x, ],
#' source_y_mat = YY[.x, ],
#' source_w_mat = WW[.x, ],
#' target_x_mat = XX1,
#' reduced = FALSE,
#' n_near = 5
#' ))
#'
#' # Exploring the structure
#' a_boot %>%
#' str(max.level = 1)
#'
#' # Accesssing fit of a specific bootstrap iteration
#' a_boot[[3]]$fit
#'
#' # Doing the same with each single bootstrap iteration
#' a_boot %>%
#' map("fit")
#'
#' # Extracting coefficinets from each single bootstrap iteration
#' a_boot %>%
#' map("fit") %>%
#' map(coefficients)
#'
#' # Combine all coefficients in a table
#' aa <-
#' a_boot %>%
#' map("fit") %>%
#' map(broom::tidy) %>%
#' map(~ select(.x, term, estimate))
#' map(seq_along(aa), .f = function(x) {
#' rename_at(aa[[x]], vars(estimate), list(~ paste0(., "_", x)))}) %>%
#' reduce(full_join, by = "term")
#'
estimate_matches <-
function(source_x_mat,
source_y_mat,
source_w_mat,
target_x_mat,
n_near = 5,
n_folds = 10,
force_lambda = NULL,
reduced = TRUE) {
if (is.null(force_lambda) || is.na(force_lambda)) {
lambda_cv <-
glmnet::cv.glmnet(
x = source_x_mat,
y = source_y_mat,
weights = source_w_mat,
type.measure = "mse",
nfolds = n_folds
)
} else {
lambda_cv <- list()
lambda_cv$lambda.min <- force_lambda
}
fit <-
glmnet::glmnet(
x = source_x_mat,
y = source_y_mat,
weights = source_w_mat,
alpha = 1,
lambda = lambda_cv$lambda.min
)
source_y_hat <- stats::predict(fit, newx = source_x_mat)[, 1]
target_y_hat <- stats::predict(fit, newx = target_x_mat)[, 1]
# Check that predict works the right way
# t(as.matrix(coef(fit))) %*% t(cbind(rep(1, nrow(target_x_mat)), target_x_mat))
match <- find_near(target_y_hat, source_y_hat, n_near)
if (!reduced) {
return(
list(
source_x_mat = source_x_mat,
source_w_mat = source_w_mat,
source_y_mat = source_y_mat,
target_x_mat = target_x_mat,
lambda_cv = lambda_cv,
fit = fit,
source_y_hat = source_y_hat,
target_y_hat = target_y_hat,
match = match
)
)
} else {
return(list(
source_y_hat = source_y_hat,
target_y_hat = target_y_hat,
match = match
))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.