R/calculate_covariate_drift.R
In drifter: Concept Drift and Concept Shift Detection for Predictive Models

#' Calculate Covariate Drift for two data frames
#'
#' Here covariate drift is defined as Non-Intersection Distance between two distributions.
#' More formally, $$d(P,Q) = 1 - sum_i min(P_i, Q_i)$$.
#' The larger the distance the more different are two distributions.
#'
#' @param data_old data frame with `old` data
#' @param data_new data frame with `new` data
#' @param bins continuous variables are discretized to `bins` intervals of equal sizes
#'
#' @return an object of a class `covariate_drift` (data.frame) with Non-Intersection Distances
#' @export
#'
#' @examples
#' library("DALEX")
#' # here we do not have any drift
#' d <- calculate_covariate_drift(apartments, apartments_test)
#' d
#' # here we do have drift
#' d <- calculate_covariate_drift(dragons, dragons_test)
#' d
#'
calculate_covariate_drift <- function(data_old, data_new, bins = 20) {
  # variables present in both datasets
  joint_var <- intersect(colnames(data_new), colnames(data_old))

  # distances between variables
  distances <- numeric(length(joint_var))
  names(distances) <- joint_var

  for (i in seq_along(joint_var)) {
    distances[i] <- calculate_distance(data_old[,i], data_new[,i], bins = bins)
  }

  df <- data.frame(variables = names(distances),
                   drift = distances)
  class(df) <- c("covariate_drift", "data.frame")
  df
}

#' Calculate Non-Intersection Distance
#'
#' @param variable_old variable from `old` data
#' @param variable_new variable from `new` data
#' @param bins continuous variables are discretized to `bins` intervals of equal size
#'
#' @return Non-Intersection Distance
#' @export
#' @examples
#' calculate_distance(rnorm(1000), rnorm(1000))
#' calculate_distance(rnorm(1000), runif(1000))
calculate_distance <- function(variable_old, variable_new, bins = 20) {
  if ("factor" %in% class(variable_old)) {
    after_cuts <- c(variable_old, variable_new)
  } else {
    after_cuts <- cut(rank(c(variable_old, variable_new)), bins)
  }

  after_cuts_table <- table(after_cuts, c(rep(1, length(variable_old)), rep(2, length(variable_new))))
  mat <- matrix(c(prop.table(after_cuts_table,2)), nrow = 2, byrow = TRUE)
  1 - sum(apply(mat, 2, min))
}

#' Print Covariate Drift Data Frame
#'
#' @param x an object of the class `covariate_drift`
#' @param max_length length of the first column, by default 25
#' @param ... other arguments, currently ignored
#'
#' @return this function prints a data frame with a nicer format
#' @export
#'
#' @examples
#' library("DALEX")
#' # here we do not have any drift
#' d <- calculate_covariate_drift(apartments, apartments_test)
#' d
#' # here we do have drift
#' d <- calculate_covariate_drift(dragons, dragons_test)
#' d
#'
print.covariate_drift <- function(x, max_length = 25, ...) {
  ntmp <- as.character(x$variables)
  nums <- sprintf("%3.1f", round(100*x$drift,1))
  nums2 <- paste0(substr(rep("     ", length(nums)), 1, 6 - nchar(nums)), nums)
  nams <- sapply(ntmp, function(j) paste0(substr("                    ", 1,
                                                 pmax(max_length - nchar(j), 0)),
                                          substr(j, 1, max_length),
                                          " "))
  stars <- paste0(ifelse((x$drift > 0.1) & (x$drift < 0.2), ".", ""),
                  ifelse(x$drift > 0.2, "*", ""),
                  ifelse(x$drift > 0.3, "*", ""),
                  ifelse(x$drift > 0.4, "*", ""))

  cat("                  Variable  Shift\n")
  cat("  -------------------------------------\n")
  cat("",paste0(nams, nums2, "  ", stars,"\n"))
  return(invisible(x))
}

Any scripts or data that you put into this service are public.

drifter documentation built on May 31, 2019, 5:04 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

drifter
Concept Drift and Concept Shift Detection for Predictive Models

R/calculate_covariate_drift.R
In drifter: Concept Drift and Concept Shift Detection for Predictive Models

Try the drifter package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

drifter Concept Drift and Concept Shift Detection for Predictive Models

R/calculate_covariate_drift.R In drifter: Concept Drift and Concept Shift Detection for Predictive Models

Try the drifter package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

drifter
Concept Drift and Concept Shift Detection for Predictive Models

R/calculate_covariate_drift.R
In drifter: Concept Drift and Concept Shift Detection for Predictive Models