R/dist_functions.R
In WeightIt: Weighting for Covariate Balance in Observational Studies

#Functions to compute distance matrices
#Copied from MatchIt with edits

#Transforms covariates so that Euclidean distance computed on transforms covariates is equivalent to
#requested distance. When discarded is not NULL, statistics relevant to transformation are computed
#using retained units, but full covariate matrix is returned.
transform_covariates <- function(formula = NULL, data = NULL, method = "mahalanobis", s.weights = NULL, var = NULL,
                                 discarded = NULL) {
  X <- get.covs.matrix.for.dist(formula, data) |>
    check_X()

  #If all variables have no variance, use Euclidean to avoid errors
  #If some have no variance, removes those to avoid messing up distances
  no_variance <- apply(X, 2L, all_the_same)

  if (sum(no_variance) == ncol(X)) {
    method <- "euclidean"
    X <- X[, 1L, drop = FALSE]
  }
  else {
    X <- X[, !no_variance, drop = FALSE]
  }

  method <- arg::match_arg(method, weightit_distances())

  if (is_null(discarded)) {
    discarded <- rep.int(FALSE, nrow(X))
  }

  if (method == "mahalanobis") {

    arg::when_not_null(var, arg::arg_cov)

    if (is_null(var)) {
      X <- scale(X)
      #NOTE: optmatch and Rubin (1980) use pooled within-group covariance matrix
      var <- {
        if (is_null(s.weights)) cov(X[!discarded, , drop = FALSE])
        else cov.wt(X[!discarded, , drop = FALSE], s.weights[!discarded])[["cov"]]
      }
    }
    else if (all(dim(var) == ncol(X))) {
      arg::err("when {.arg var} is not {.val {list(NULL)}}, it must be a covariance matrix with as many entries as supplied variables")
    }

    inv_var <- generalized_inverse(var)

    X <- mahalanobize(X, inv_var)
  }
  else if (method == "robust_mahalanobis") {
    #Rosenbaum (2010, ch8)
    X_r <- matrix(0, nrow = sum(!discarded), ncol = ncol(X),
                  dimnames = list(rownames(X)[!discarded], colnames(X)))

    for (i in seq_col(X_r)) {
      X_r[, i] <- rank(X[!discarded, i])
    }

    var_r <- {
      if (is_null(s.weights)) cov(X_r)
      else cov.wt(X_r, s.weights[!discarded])$cov
    }

    multiplier <- sd(seq_len(sum(!discarded))) / sqrt(diag(var_r))
    var_r <- var_r * tcrossprod(multiplier)

    inv_var <- generalized_inverse(var_r)

    if (any(discarded)) {
      X_r <- array(0, dim = dim(X), dimnames = dimnames(X))
      for (i in seq_col(X_r)) {
        X_r[!discarded, i] <- rank(X[!discarded, i])
      }
    }

    X <- mahalanobize(X_r, inv_var)
  }
  else if (method == "euclidean") {
    #Do nothing
  }
  else if (method == "scaled_euclidean") {
    arg::when_not_null(var,
                       arg::arg_or(
                         arg::arg_cov,
                         arg::arg_and(
                           arg::arg_numeric,
                           arg::arg_gte(0)
                       )))

    if (is_null(var)) {
      sds <- sqrt(col.w.v(X[!discarded, , drop = FALSE],
                          w = s.weights[!discarded]))
    }
    else if (is.matrix(var) && all(dim(var) == ncol(X))) {
      sds <- sqrt(diag(var))
    }
    else if (is.numeric(var) && length(var) == ncol(X)) {
      sds <- sqrt(var)
    }
    else {
      arg::err("when {.arg var} is not {.val {list(NULL)}}, it must be a covariance matrix or a vector of variances with as many entries as supplied variables")
    }

    for (i in seq_col(X)) {
      X[, i] <- X[, i] / sds[i]
    }
  }

  X
}

#Internal function for fast(ish) Euclidean distance
eucdist_internal <- function(X, treat = NULL) {

  if (is_null(treat)) {
    if (length(dim(X)) <= 1L) {
      d <- abs(outer(drop(X), drop(X), "-"))
      dimnames(d) <- list(names(X), names(X))
    }
    else {
      if (ncol(X) == 1L) {
        d <- abs(outer(drop(X), drop(X), "-"))
      }
      else {
        d <- as.matrix(dist(X))
      }
      dimnames(d) <- list(rownames(X), rownames(X))
    }
  }
  else {
    treat_l <- as.logical(treat)
    if (length(dim(X)) <= 1L) {
      d <- abs(outer(X[treat_l], X[!treat_l], "-"))
      dimnames(d) <- list(names(X)[treat_l], names(X)[!treat_l])
    }
    else {
      if (ncol(X) == 1L) {
        d <- abs(outer(X[treat_l, ], X[!treat_l, ], "-"))
      }
      else {
        d <- as.matrix(dist(X))[treat_l, !treat_l, drop = FALSE]
      }
      dimnames(d) <- list(rownames(X)[treat_l], rownames(X)[!treat_l])
    }
  }

  d
}

#Get covariates (RHS) vars from formula; factor variable contrasts divided by sqrt(2)
#to ensure same result as when non-factor binary variable supplied (see optmatch:::contr.match_on)
get.covs.matrix.for.dist <- function(formula = NULL, data = NULL) {

  if (is_null(formula)) {
    fnames <- colnames(data)
    fnames[!startsWith(fnames, "`")] <- add_quotes(fnames[!startsWith(fnames, "`")], "`")
    data <- as.data.frame(data)
    formula <- terms(reformulate(fnames), data = data)
  }
  else {
    data <- as.data.frame(data)
    formula <- terms(formula, data = data)
  }

  mf <- model.frame(formula, data, na.action = na.pass)

  chars.in.mf <- vapply(mf, is.character, logical(1L))
  mf[chars.in.mf] <- lapply(mf[chars.in.mf], factor)

  formula <- update(formula, NULL ~ . + 1)

  X <- model.matrix(formula, data = mf,
                    contrasts.arg = lapply(Filter(is.factor, mf),
                                           function(x) contrasts(x, contrasts = FALSE) / sqrt(2)))

  if (ncol(X) > 1L) {
    .assign <- .attr(X, "assign")[-1L]
    X <- X[, -1L, drop = FALSE]
  }
  attr(X, "assign") <- .assign

  attr(X, "treat") <-  model.response(mf)

  X
}

check_X <- function(X) {
  if (isTRUE(.attr(X, "checked"))) {
    return(X)
  }

  treat <- .attr(X, "treat")

  if (is.data.frame(X)) {
    X <- as.matrix(X)
  }
  else if (is.numeric(X) && length(dim(X)) <= 1L) {
    X <- matrix(X, nrow = length(X),
                dimnames = list(names(X), NULL))
  }

  if (anyNA(X)) {
    arg::err("missing values are not allowed in the covariates")
  }

  if (!all(is.finite(X))) {
    arg::err("non-finite values are not allowed in the covariates")
  }

  if (!is.numeric(X) || length(dim(X)) != 2L) {
    arg::err("bad X")
  }

  attr(X, "checked") <- TRUE
  attr(X, "treat") <- treat

  X
}

is.cov_like <- function(var, X) {
  is.numeric(var) &&
    length(dim(var)) == 2L &&
    (missing(X) || all(dim(var) == ncol(X))) &&
    isSymmetric(var) &&
    all(diag(var) >= 0)
}

weightit_distances <- function() {
  c("mahalanobis", "robust_mahalanobis", "euclidean", "scaled_euclidean")
}

mahalanobize <- function(X, inv_var) {
  ## Mahalanobize covariates by computing Cholesky decomp,
  ## allowing for NPD cov matrix by pivoting
  tcrossprod(X, .chol2(inv_var))
}

Any scripts or data that you put into this service are public.

WeightIt documentation built on April 22, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

WeightIt
Weighting for Covariate Balance in Observational Studies

R/dist_functions.R
In WeightIt: Weighting for Covariate Balance in Observational Studies

Defines functions mahalanobize weightit_distances is.cov_like check_X get.covs.matrix.for.dist eucdist_internal transform_covariates

Try the WeightIt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

WeightIt Weighting for Covariate Balance in Observational Studies

R/dist_functions.R In WeightIt: Weighting for Covariate Balance in Observational Studies

Defines functions mahalanobize weightit_distances is.cov_like check_X get.covs.matrix.for.dist eucdist_internal transform_covariates

Try the WeightIt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

WeightIt
Weighting for Covariate Balance in Observational Studies

R/dist_functions.R
In WeightIt: Weighting for Covariate Balance in Observational Studies