R/calc_ale_ixn.R

Defines functions calc_ale_ixn

# calc_ale_ixn.R
#

# Calculate ALE interaction data
#
# This function is not exported. It is copy-pasted (with some variable name changes)
# from [ALEPlot::ALEPlot()].
# This function is not usually called directly by the user. For details about
# arguments not documented here, see [ale()].
#
#  @author Dan Apley (source of original calculation of ALE in [ALEPlot::ALEPlot()])
#  @references Apley, Daniel W., and Jingyu Zhu.
#  "Visualizing the effects of predictor variables in black box supervised learning models."
#  Journal of the Royal Statistical Society Series B: Statistical Methodology
#  82.4 (2020): 1059-1086.
#  @author Chitu Okoli (rewrote the code based on [ALEPlot::ALEPlot()]` from while retaining ALE calculation)
#
# @param X dataframe. Data for which ALE is to be calculated. The y (outcome)
# column is absent.
# @param model See documentation for [ale()]
# @param x1_col,x2_col character length 1. Name of single columns in X for which
# ALE interaction data is to be calculated. `x1_col` can be of any standard
#  datatype (logical, factor, or numeric) but `x2_col` can only be numeric.
# @param pred_fun See documentation for [ale()]
# @param pred_type See documentation for [ale()]
# @param x_intervals See documentation for [ale()]
#
# @import dplyr
# @import purrr
# @importFrom stats quantile
#
calc_ale_ixn <- function(
    X, model, x1_col, x2_col,
    # n_row, n_col,
    pred_fun, pred_type, x_intervals
) {

  n_row <- nrow(X)
  n_col <- ncol(X)

  # shorten name internally
  xint <- x_intervals
  rm(x_intervals)

  x1_type <- var_type(X[[x1_col]])
  x2_type <- var_type(X[[x2_col]])

  # if (class(X[[x2_col]]) != "numeric" & class(X[[x2_col]]) != "integer") {
  if (x2_type != "numeric") {
    stop("x2 must be numeric or integer. Only x1 can be of a different datatype.")
  }

  # if (class(X[[x1_col]]) %in% c('numeric', 'integer')) {#for numerical/integer X[[x1_col]], calculate the ALE plot
  if (x1_type == 'numeric') {#for numerical/integer X[[x1_col]], calculate the ALE plot

    #find the vectors of z values corresponding to the quantiles of X[[x1_col]] and X[[x2_col]]
    z1 = c(min(X[[x1_col]]), as.numeric(quantile(X[[x1_col]],seq(1/xint,1,length.out=xint), type=1)))  #vector of xint+1 z values for X[[x1_col]]
    z1 = unique(z1)  #necessary if X[,cid(1)] is discrete, in which case z1 could have repeated values
    K1 = length(z1)-1 #reset K1 to the number of unique quantile points
    #group training rows into bins based on z1
    a1 = as.numeric(cut(X[[x1_col]], breaks=z1, include.lowest=TRUE)) #n_row-length index vector indicating into which z1-bin the training rows fall
    z2 = c(min(X[[x2_col]]), as.numeric(quantile(X[[x2_col]],seq(1/xint,1,length.out=xint), type=1)))  #vector of xint+1 z values for X[[x2_col]]
    z2 = unique(z2)  #necessary if X[,cid(2)] is discrete, in which case z2 could have repeated values
    K2 = length(z2)-1 #reset K2 to the number of unique quantile points
    ale_y = matrix(0,K1,K2)  #rows correspond to X[,cid(1)] and columns to X[,cid(2)]
    #group training rows into bins based on z2
    a2 = as.numeric(cut(X[[x2_col]], breaks=z2, include.lowest=TRUE)) #n_row-length index vector indicating into which z2-bin the training rows fall
    X11 = X  #matrix with low X[[x1_col]] and low X[[x2_col]]
    X12 = X  #matrix with low X[[x1_col]] and high X[[x2_col]]
    X21 = X  #matrix with high X[[x1_col]] and low X[[x2_col]]
    X22 = X  #matrix with high X[[x1_col]] and high X[[x2_col]]

    # X11[x_col] = cbind(z1[a1], z2[a2])
    # X12[x_col] = cbind(z1[a1], z2[a2+1])
    # X21[x_col] = cbind(z1[a1+1], z2[a2])
    # X22[x_col] = cbind(z1[a1+1], z2[a2+1])
    X11[c(x1_col, x2_col)] = cbind(z1[a1], z2[a2])
    X12[c(x1_col, x2_col)] = cbind(z1[a1], z2[a2+1])
    X21[c(x1_col, x2_col)] = cbind(z1[a1+1], z2[a2])
    X22[c(x1_col, x2_col)] = cbind(z1[a1+1], z2[a2+1])
    y.hat11 = pred_fun(model, X11, pred_type)
    y.hat12 = pred_fun(model, X12, pred_type)
    y.hat21 = pred_fun(model, X21, pred_type)
    y.hat22 = pred_fun(model, X22, pred_type)
    # y.hat11 = pred_fun(object = model, newdata = X11)
    # y.hat12 = pred_fun(object = model, newdata = X12)
    # y.hat21 = pred_fun(object = model, newdata = X21)
    # y.hat22 = pred_fun(object = model, newdata = X22)

    Delta=(y.hat22-y.hat21)-(y.hat12-y.hat11)  #n_row-length vector of individual local effect values


    ### Maybe bootstrapped section starts here
    Delta = as.matrix(tapply(Delta, list(a1, a2), mean)) #K1xK2 matrix of averaged local effects, which includes NA values if a cell is empty
    #replace NA values in Delta by the Delta value in their nearest neighbor non-NA cell
    NA.Delta = is.na(Delta)  #K1xK2 matrix indicating cells that contain no observations
    NA.ind = which(NA.Delta, arr.ind=T, useNames = F)  #2-column matrix of row and column indices for NA cells
    if (nrow(NA.ind) > 0) {
      notNA.ind = which(!NA.Delta, arr.ind=T, useNames = F)  #2-column matrix of row and column indices for non-NA cells
      range1 = max(z1)-min(z1)
      range2 = max(z2)-min(z2)
      Z.NA = cbind((z1[NA.ind[,1]] + z1[NA.ind[,1]+1])/2/range1, (z2[NA.ind[,2]] + z2[NA.ind[,2]+1])/2/range2) #standardized {z1,z2} values for NA cells corresponding to each row of NA.ind
      Z.notNA = cbind((z1[notNA.ind[,1]] + z1[notNA.ind[,1]+1])/2/range1, (z2[notNA.ind[,2]] + z2[notNA.ind[,2]+1])/2/range2) #standardized {z1,z2} values for non-NA cells corresponding to each row of notNA.ind
      nbrs <- yaImpute::ann(Z.notNA, Z.NA, k=1, verbose = F)$knnIndexDist[,1] #vector of row indices (into Z.notNA) of nearest neighbor non-NA cells for each NA cell
      Delta[NA.ind] = Delta[matrix(notNA.ind[nbrs,], ncol=2)] #Set Delta for NA cells equal to Delta for their closest neighbor non-NA cell. The matrix() command is needed, because if there is only one empty cell, notNA.ind[nbrs] is created as a 2-length vector instead of a 1x2 matrix, which does not index Delta properly
    } #end of if (nrow(NA.ind) > 0) statement
    #accumulate the values in Delta
    ale_y = apply(t(apply(Delta,1,cumsum)),2,cumsum)  #second-order accumulated effects before subtracting lower order effects
    ale_y = rbind(rep(0,K2),ale_y) #add a first row and first column to ale_y that are all zeros
    ale_y = cbind(rep(0,K1+1),ale_y)
    #now subtract the lower-order effects from ale_y
    b=as.matrix(table(a1,a2))  #K1xK2 cell count matrix (rows correspond to X[[x1_col]]; columns to X[[x2_col]])
    b1=apply(b,1,sum)  #K1x1 count vector summed across X[[x2_col]], as function of X[[x1_col]]
    b2=apply(b,2,sum)  #K2x1 count vector summed across X[[x1_col]], as function of X[[x2_col]]
    Delta =ale_y[2:(K1+1),]-ale_y[1:K1,] #K1x(K2+1) matrix of differenced ale_y values, differenced across X[[x1_col]]
    b.Delta = b*(Delta[,1:K2]+Delta[,2:(K2+1)])/2
    Delta.Ave = apply(b.Delta,1,sum)/b1
    fJ1 = c(0,cumsum(Delta.Ave))
    Delta = ale_y[,2:(K2+1)]-ale_y[,1:K2] #(K1+1)xK2 matrix of differenced ale_y values, differenced across X[[x2_col]]
    b.Delta = b*(Delta[1:K1,]+Delta[2:(K1+1),])/2
    Delta.Ave = apply(b.Delta,2,sum)/b2
    fJ2 = c(0, cumsum(Delta.Ave))
    ale_y = ale_y - outer(fJ1,rep(1,K2+1)) - outer(rep(1,K1+1),fJ2)
    fJ0 = sum(b*(ale_y[1:K1,1:K2] + ale_y[1:K1,2:(K2+1)] + ale_y[2:(K1+1),1:K2] + ale_y[2:(K1+1), 2:(K2+1)])/4)/sum(b)
    ale_y = ale_y - fJ0
    x <- list(z1, z2)
    xint <- c(K1, K2)

  }  #end of else if (class(X[[x1_col]]) == "numeric" | class(X[[x1_col]]) == "integer") statement

  # else if (class(X[[x1_col]]) == "factor") {#for categorical X[[x1_col]], calculate the ALE plot
  else if (x1_type %in% c('binary', 'ordinal', 'multinomial')) {

    # If x1_col is a factor, drop any unused levels
    if ('factor' %in% class(X[[x1_col]]) ) {
      X[[x1_col]] <- droplevels(X[[x1_col]])
    }

    #tabulate level counts and probabilities
    x_level_counts <- as.numeric(table(X[[x1_col]])) #frequency count vector for levels of X[[x1_col]]
    # x_level_probs <- x_level_counts/sum(x_level_counts) #probability vector for levels of X[[x1_col]]
    K1 <- nlevels(X[[x1_col]])  #set K1 to the number of levels of X[[x1_col]]
    cum_dist_mx <- matrix(0, K1, K1)  #will be the distance matrix between pairs of levels of X[[x1_col]]
    dist_mx <- matrix(0, K1, K1)  #initialize matrix

    #For loop for calculating distance matrix dist_mx for each of the other predictors
    for (j_col in setdiff(names(X), x1_col)) {
    # for (j in setdiff(1:n_col, cid[1])) {
      # if (class(X[[j_col]]) == "factor") {#Calculate the distance matrix for each categorical predictor
      if (var_type(X[[j_col]]) != 'numeric') {#Calculate the distance matrix for each categorical predictor
        x_j_freq <- table(X[[x1_col]],X[[j_col]])  #frequency table, rows of which will be compared
        x_j_freq <- x_j_freq / as.numeric(x_level_counts)
        for (i in 1:(K1-1)) {
          for (k in (i+1):K1) {
            dist_mx[i,k] = sum(abs(x_j_freq[i,]-x_j_freq[k,]))/2  #This dissimilarity measure is always within [0,1]
            dist_mx[k,i] = dist_mx[i,k]
          }
        }
        cum_dist_mx <- cum_dist_mx + dist_mx
      }  #End of if (class(X[,j[1]] == "factor") statement
      else  { #calculate the distance matrix for each numerical predictor
        j_quantiles <- quantile(X[[j_col]], probs = seq(0, 1, length.out = 100), na.rm = TRUE, names = FALSE)  #quantiles of X[[j_col]] for all levels of X[[x1_col]] combined
        x_by_j_ecdf=tapply(X[[j_col]], X[[x1_col]], stats::ecdf) #list of ecdf's for X[[j_col]] by levels of X[[x1_col]]
        for (i in 1:(K1-1)) {
          for (k in (i+1):K1) {
            dist_mx[i,k] = max(abs(x_by_j_ecdf[[i]](j_quantiles)-x_by_j_ecdf[[k]](j_quantiles)))  #This dissimilarity measure is the Kolmogorov-Smirnov distance between X[[j_col]] for levels i and k of X[[x1_col]]. It is always within [0,1]
            dist_mx[k,i] = dist_mx[i,k]
          }
        }
        cum_dist_mx <- cum_dist_mx + dist_mx
      }  #End of else statement that goes with if (class(X[[j_col]] == "factor") statement
    } #end of for (j in setdiff(1:n_col, cid[1]) loop

    #calculate the 1-D MDS representation of dist_mx and the ordered levels of X[[x1_col]]
    dist_mx_1D <- stats::cmdscale(cum_dist_mx, k = 1) #1-dimensional MDS representation of the distance matrix
    idx_ord_orig_level <- sort(dist_mx_1D, index.return = T)$ix    #K1-length index vector. The i-th element is the original level index of the i-th lowest ordered level of X[[x1_col]].
    ord.ind <- sort(idx_ord_orig_level, index.return = T)$ix    #Another K1-length index vector. The i-th element is the order of the i-th original level of X[[x1_col]].
    levels_original_order <- levels(X[[x1_col]])  #as.character levels of X[[x1_col]] in original order
    levels_ale_order <- levels_original_order[idx_ord_orig_level]  #as.character levels of X[[x1_col]] after ordering
    x_ordered_idx <- ord.ind[as.numeric(X[[x1_col]])]  #n_row-length index vector of numerical version of X[[x1_col]] with numbers corresponding to the indices of the ordered levels

    #Calculate the model predictions with the levels of X[[x1_col]] increased and decreased by one
    z2 = c(min(X[[x2_col]]), as.numeric(quantile(X[[x2_col]],seq(1/xint,1,length.out=xint), type=1)))  #vector of xint+1 z values for X[[x2_col]]
    z2 = unique(z2)  #necessary if X[,cid(2)] is discrete, in which case z2 could have repeated values
    K2 = length(z2)-1 #reset K2 to the number of unique quantile points
    #group training rows into bins based on z2
    a2 = as.numeric(cut(X[[x2_col]], breaks=z2, include.lowest=TRUE)) #n_row-length index vector indicating into which z2-bin the training rows fall
    row_idx_not_hi <- (1:n_row)[x_ordered_idx < K1]  #indices of rows for which X[[x1_col]] was not the highest level
    X11 = X  #matrix with low X[[x1_col]] and low X[[x2_col]]
    X12 = X  #matrix with low X[[x1_col]] and high X[[x2_col]]
    X21 = X  #matrix with high X[[x1_col]] and low X[[x2_col]]
    X22 = X  #matrix with high X[[x1_col]] and high X[[x2_col]]
    X11[row_idx_not_hi,x2_col] = z2[a2][row_idx_not_hi]
    X12[row_idx_not_hi,x2_col] = z2[a2+1][row_idx_not_hi]
    X21[row_idx_not_hi,x1_col] = levels_ale_order[x_ordered_idx[row_idx_not_hi]+1]
    X22[row_idx_not_hi,x1_col] = levels_ale_order[x_ordered_idx[row_idx_not_hi]+1]
    X21[row_idx_not_hi,x2_col] = z2[a2][row_idx_not_hi]
    X22[row_idx_not_hi,x2_col] = z2[a2+1][row_idx_not_hi]
    y.hat11 = pred_fun(model, X11[row_idx_not_hi,], pred_type)
    y.hat12 = pred_fun(model, X12[row_idx_not_hi,], pred_type)
    y.hat21 = pred_fun(model, X21[row_idx_not_hi,], pred_type)
    y.hat22 = pred_fun(model, X22[row_idx_not_hi,], pred_type)
    # y.hat11 = pred_fun(object = model, newdata = X11[row_idx_not_hi,])
    # y.hat12 = pred_fun(object = model, newdata = X12[row_idx_not_hi,])
    # y.hat21 = pred_fun(object = model, newdata = X21[row_idx_not_hi,])
    # y.hat22 = pred_fun(object = model, newdata = X22[row_idx_not_hi,])
    delta_hi=(y.hat22-y.hat21)-(y.hat12-y.hat11)  #n.plus-length vector of individual local effect values
    row_idx_not_lo <- (1:n_row)[x_ordered_idx > 1]  #indices of rows for which X[[x1_col]] was not the lowest level
    X11 = X  #matrix with low X[[x1_col]] and low X[[x2_col]]
    X12 = X  #matrix with low X[[x1_col]] and high X[[x2_col]]
    X21 = X  #matrix with high X[[x1_col]] and low X[[x2_col]]
    X22 = X  #matrix with high X[[x1_col]] and high X[[x2_col]]
    X11[row_idx_not_lo,x1_col] = levels_ale_order[x_ordered_idx[row_idx_not_lo]-1]
    X12[row_idx_not_lo,x1_col] = levels_ale_order[x_ordered_idx[row_idx_not_lo]-1]
    X11[row_idx_not_lo,x2_col] = z2[a2][row_idx_not_lo]
    X12[row_idx_not_lo,x2_col] = z2[a2+1][row_idx_not_lo]
    X21[row_idx_not_lo,x2_col] = z2[a2][row_idx_not_lo]
    X22[row_idx_not_lo,x2_col] = z2[a2+1][row_idx_not_lo]
    y.hat11 = pred_fun(model, X11[row_idx_not_lo,], pred_type)
    y.hat12 = pred_fun(model, X12[row_idx_not_lo,], pred_type)
    y.hat21 = pred_fun(model, X21[row_idx_not_lo,], pred_type)
    y.hat22 = pred_fun(model, X22[row_idx_not_lo,], pred_type)
    # y.hat11 = pred_fun(object = model, newdata = X11[row_idx_not_lo,])
    # y.hat12 = pred_fun(object = model, newdata = X12[row_idx_not_lo,])
    # y.hat21 = pred_fun(object = model, newdata = X21[row_idx_not_lo,])
    # y.hat22 = pred_fun(object = model, newdata = X22[row_idx_not_lo,])
    delta_lo=(y.hat22-y.hat21)-(y.hat12-y.hat11)  #n.neg-length vector of individual local effect values


    ### Maybe bootstrapped section starts here
    ### But maybe earlier with something funny going on before
    Delta = as.matrix(tapply(c(delta_hi, delta_lo), list(c(x_ordered_idx[row_idx_not_hi], x_ordered_idx[row_idx_not_lo]-1), a2[c(row_idx_not_hi, row_idx_not_lo)]), mean)) #(K1-1)xK2 matrix of averaged local effects, which includes NA values if a cell is empty
    #replace NA values in Delta by the Delta value in their nearest neighbor non-NA cell
    NA.Delta = is.na(Delta)  #(K1-1)xK2 matrix indicating cells that contain no observations
    NA.ind = which(NA.Delta, arr.ind=T, useNames = F)  #2-column matrix of row and column indices for NA cells
    if (nrow(NA.ind) > 0) {
      notNA.ind = which(!NA.Delta, arr.ind=T, useNames = F)  #2-column matrix of row and column indices for non-NA cells
      range1 =K1-1
      range2 = max(z2)-min(z2)
      Z.NA = cbind(NA.ind[,1]/range1, (z2[NA.ind[,2]] + z2[NA.ind[,2]+1])/2/range2) # standardized {z1,z2} values for NA cells corresponding to each row of NA.ind, where z1 =1:(K1-1) represents the ordered levels of X[[x_col]]
      Z.notNA = cbind(notNA.ind[,1]/range1, (z2[notNA.ind[,2]] + z2[notNA.ind[,2]+1])/2/range2) #standardized {z1,z2} values for non-NA cells corresponding to each row of notNA.ind
      nbrs <- yaImpute::ann(Z.notNA, Z.NA, k=1, verbose = F)$knnIndexDist[,1] #vector of row indices (into Z.notNA) of nearest neighbor non-NA cells for each NA cell
      Delta[NA.ind] = Delta[matrix(notNA.ind[nbrs,], ncol=2)] #Set Delta for NA cells equal to Delta for their closest neighbor non-NA cell. The matrix() command is needed, because if there is only one empty cell, notNA.ind[nbrs] is created as a 2-length vector instead of a 1x2 matrix, which does not index Delta properly
    } #end of if (nrow(NA.ind) > 0) statement
    #accumulate the values in Delta
    ale_y = matrix(0,K1-1,K2)  #rows correspond to X[,cid(1)] and columns to X[,cid(2)]
    ale_y = apply(t(apply(Delta,1,cumsum)),2,cumsum)  #second-order accumulated effects before subtracting lower order effects
    ale_y = rbind(rep(0,K2),ale_y) #add a first row to ale_y that are all zeros
    ale_y = cbind(rep(0,K1),ale_y) #add a first column to ale_y that are all zeros, so ale_y is now K1x(K2+1)
    #now subtract the lower-order effects from ale_y
    b=as.matrix(table(x_ordered_idx,a2))  #K1xK2 cell count matrix (rows correspond to X[[x1_col]]; columns to X[[x2_col]])
    b2=apply(b,2,sum)  #K2x1 count vector summed across X[[x1_col]], as function of X[[x2_col]]
    Delta = ale_y[,2:(K2+1)]-ale_y[,1:K2] #K1xK2 matrix of differenced ale_y values, differenced across X[[x2_col]]
    b.Delta = b*Delta
    Delta.Ave = apply(b.Delta,2,sum)/b2 #K2x1 vector of averaged local effects
    fJ2 = c(0, cumsum(Delta.Ave))  #(K2+1)x1 vector of accumulated local effects
    b.ave=matrix((b[1:(K1-1),]+b[2:K1,])/2, K1-1, K2)  #(K1-1)xK2 cell count matrix (rows correspond to X[[x1_col]] but averaged across neighboring levels; columns to X[[x2_col]]). Must use "matrix(...)" in case K1=2
    b1=apply(b.ave,1,sum)  #(K1-1)x1 count vector summed across X[[x2_col]], as function of X[[x1_col]]
    Delta =matrix(ale_y[2:K1,]-ale_y[1:(K1-1),], K1-1, K2+1) #(K1-1)x(K2+1) matrix of differenced ale_y values, differenced across X[[x1_col]]
    b.Delta = matrix(b.ave*(Delta[,1:K2]+Delta[,2:(K2+1)])/2, K1-1, K2) #(K1-1)xK2 matrix
    Delta.Ave = apply(b.Delta,1,sum)/b1 #(K1-1)x1 vector of averaged local effects
    fJ1 = c(0,cumsum(Delta.Ave)) #K1x1 vector of accumulated local effects
    ale_y = ale_y - outer(fJ1,rep(1,K2+1)) - outer(rep(1,K1),fJ2)
    fJ0 = sum(b*(ale_y[,1:K2] + ale_y[,2:(K2+1)])/2)/sum(b)
    ale_y = ale_y - fJ0 #K1x(K2+1) matrix
    # x <- list(levels_ale_order, z2)
    x <- list(as.factor(levels_ale_order), z2)
    xint <- c(K1, K2)

  } #end of if (class(X[[x1_col]]) == "factor") statement

  else {
    stop("x1 must be logical, factor, ordered, or numeric")
  }


  ### The following finalization code will probably have to be run for each
  ### bootstrap iteration

  colnames(ale_y)[1] <- "origin"
  rownames(ale_y)[1] <- "origin"

  x1_x_x2_df <-
    ale_y |>
    as_tibble(rownames = "ale_x1", .name_repair = "unique")  |>
    mutate(ale_x1 = x[[1]]) |>
    set_names(c('ale_x1', as.character(x[[2]]))) |>
    tidyr::pivot_longer(
      cols = -c("ale_x1"),
      names_to = "ale_x2",
      values_to = "ale_y"
    ) |>
    mutate(ale_x2 = as.numeric(.data$ale_x2))

  return(x1_x_x2_df)
}

Try the ale package in your browser

Any scripts or data that you put into this service are public.

ale documentation built on May 29, 2024, 10:33 a.m.