R/ParetoElnet_function.R

Defines functions calculate_perf_rrace calculate_perf_AI plotPareto myTCon_eq myT_Elnet myLinCom_Elnet Weight_Generate WeightsFun dimFun assert_col_vec myCon_eq myCon_ineq myFM NBI_Elnet

Documented in assert_col_vec calculate_perf_AI calculate_perf_rrace dimFun myCon_eq myCon_ineq myFM myLinCom_Elnet myTCon_eq myT_Elnet NBI_Elnet plotPareto Weight_Generate WeightsFun

# Pareto-Optimization via Normal Boundary Intersection
# Developer: Q. Chelsea Song
# Contact: qianqisong@gmail.com

####################################### NBI Main Function ####################################

#' NBI Main Function
#'
#' Main function for obtaining pareto-optimal solution via normal boundary intersection.
#' @param X0 Initial input for predictor weight vector
#' @param Spac Number of Pareto spaces (i.e., number of Pareto points minus one)
#' @param Fnum Number of criteria
#' @param VLB Lower boundary for weight vector estimation
#' @param VUB Upper boundary for weight vector estimation
#' @param TolX Tolerance index for estimating weight vector, default is 1e-4
#' @param TolF Tolerance index for estimating criterion, default is 1e-4
#' @param TolCon Tolerance index for constraint conditions, default is 1e-7
#' @param graph If TRUE, plots will be generated for Pareto-optimal curve and predictor weights
#' @param display_solution If TRUE, Pareto-optimal solution will be displayed
#' @import nloptr
#' @return Pareto-Optimal solutions

NBI_Elnet = function(X0,Spac,Fnum,VLB=vector(),VUB=vector(),TolX=1e-4,TolF=1e-4,TolCon=1e-7,graph=TRUE,display_solution=TRUE){

cat('\n Estimating Regularized Pareto-Optimal Solution ... \n')

#------------------------------Initialize Options-------------------------------#

  X0 = assert_col_vec(X0)
  VLB = assert_col_vec(VLB)
  VUB = assert_col_vec(VUB)

  # Number of variables
  nvars = length(X0)

  # Set options
  # algorithm: sequential (least-squares) quadratic programming algorithm
  # (SQP is algorithm for nonlinearly constrained, gradient-based optimization,
  # supporting both equality and inequality constraints.)
  # maxeval: Max Iterations
  # xtol_abs: Tolerance for X
  # ftol_abs: Tolerance for F
  # tol_constraints_ineq/eq: Tolerance for inequal/equal constraints
  # (for reference) MATLAB constraints:
  # options = optimoptions('fmincon','Algorithm','sqp','MaxIter',(nvars+1)*1000,'TolFun',TolF,'TolX',TolX,'TolCon',TolCon,'Display','off')

  suppressWarnings({
    nloptr::nl.opts(optlist = list(
      maxeval = (nvars+1)*1000
      ,xtol_rel = TolX
      ,ftol_rel = TolF
    ))
  })

  # Initialize PHI

  PHI = matrix(0,Fnum,Fnum)

  #------------------------------Shadow Point-------------------------------#

  # cat('\n ----Step 1: find shadow minimum---- \n')

  ShadowF = matrix(0,Fnum)
  ShadowX = matrix(0,nvars,Fnum)
  xstart  = X0
  out = WeightsFun(Fnum,Spac)
  Weight = out$Weights
  Near = out$Formers
  rm(out)
  Weight = Weight/Spac

  for(i in 1:Fnum){

    temp = c(1,dim(Weight)[2])
    j = temp[i]
    g_Weight <<- Weight[,j]
    fmin = 9999
    ntr = nvars-1
    fminv = matrix(0,ntr,1)
    fminx = matrix(0,nvars,ntr)

    for(k in 1:ntr){

      xstart = runif(length(X0))

      out = suppressWarnings({
        nloptr::slsqp(x0 = X0, fn = myLinCom_Elnet
                      ,lower = VLB, upper = VUB
                      ,hin = myCon_ineq
                      ,heq = myCon_eq
        )})

      x = out$par
      f = out$value
      rm(out)

      fminv[k] = f
      fminx[,k] = x

      if(f <= fmin){

        fmin = f
        reps = k

      }

    }

    x = fminx[,reps]
    som = 0

    for(k in 2:nvars){
      som = som + x[k]
    }

    for(k in 2:nvars){
      x[k] = x[k]/som
    }
    # to make sum of x = 1

    ShadowX[,i] = x
    ShadowX = round(ShadowX,4)

    tempF = -myFM(x)
    ShadowF[i] = round(tempF[i],4)

  }


  #------------------------------Matrix PHI-------------------------------#

  # cat('\n ----Step 2: find PHI---- \n')

  for(i in 1:Fnum){

    PHI[,i] = myFM(ShadowX[,i]) + ShadowF
    PHI[i,i] = 0

  }

  # print(round(PHI,3))

  # Check to make sure that QPP is n-1 dimensional
  if(rcond(PHI) < 1e-8){stop(' Phi matrix singular, aborting.')}

  #------------------------------Quasi-Normal Direction-------------------------------#

  # cat('\n ----Step 3: find Quasi-Normal---- \n')

  g_Normal <<- -PHI%*%matrix(1,Fnum,1)

  #------------------------------weights-------------------------------#

  # cat('\n ----Step 4: create weights---- \n')

  out = WeightsFun(Fnum,Spac)
  Weight = out$Weight
  Near = out$Formers
  Weight = Weight/Spac
  num_w = dimFun(Weight)[2]

  # cat('\n Weights in row: \n')
  # print(round(Weight,3))

  #------------------------------NBI Subproblems-------------------------------#

  # cat('\n ----Step 5: solve NBI sub-problems---- \n')

  # Starting point for first NBI subproblem is the minimizer of f_1(x)
  xstart = c(ShadowX[,1],0)

  Pareto_Fmat = vector()       # Pareto Optima in F-space
  Pareto_Xmat = vector()       # Pareto Optima in X-space
  X_Near      = vector()

  # solve NBI subproblems
  for(k in 1:num_w){

    w  = Weight[,k]

    # Solve problem only if it is not minimizing one of the individual objectives
    indiv_fn_index = which(w == 1)
    # the boundary solution which has been solved

      w = rev(w)

      if(Near[k] > 0){

        xstart = X_Near[,Near[k]]
        #start X is the previous weight-order's X

      }

      #start point in F-space
      g_StartF <<- PHI%*%w + ShadowF

      # SOLVE NBI SUBPROBLEM

      out = suppressWarnings({
        nloptr::slsqp(x0 = xstart, fn = myT_Elnet
                      ,lower = c(VLB,-Inf)
                      ,upper = c(VUB,Inf)
                      ,hin = myCon_ineq
                      ,heq = myTCon_eq)
        })

      x_trial = out$par
      f = out$value
      rm(out)


      Pareto_Fmat = cbind(Pareto_Fmat, -myFM(x_trial[1:nvars]))  # Pareto optima in F-space
      som = 0

      for(k in 2:nvars){som = som + x_trial[k]}

      for(k in 2:nvars){x_trial[k] = x_trial[k]/som}

      Pareto_Xmat = cbind(Pareto_Xmat, x_trial[1:nvars])        # Pareto optima in X-space
      X_Near = cbind(X_Near,x_trial)

    }

  #------------------------------Plot Solutions-------------------------------#

  if(graph==TRUE){plotPareto(Pareto_Fmat, Pareto_Xmat)}

  #------------------------------Output Solutions-------------------------------#

#   Output Solution

  Pareto_Fmat = t(Pareto_Fmat)
  Pareto_Xmat = t(Pareto_Xmat[2:nrow(Pareto_Xmat),])
  colnames(Pareto_Fmat) = c("AI.ratio","Criterion.Validity")
  colnames(Pareto_Xmat) = c(paste0(rep("P",(nvars-1)),1:(nvars-1)))

  if(display_solution == TRUE){

    solution = round(cbind(Pareto_Fmat,Pareto_Xmat),3)
    colnames(solution) = c("AI.ratio","Criterion.Validity", paste0(rep("P",(nvars-1)),1:(nvars-1)))
    cat("\n Pareto-Optimal Solution \n \n")
    print(solution)

  }else{
    cat("\n Done. \n \n")
  }


  return(list(Pareto_Fmat = round(Pareto_Fmat, 3),
              Pareto_Xmat = round(Pareto_Xmat, 3)))

}

########################### Supporting Functions (A) ########################

# User-Defined Input for NBI.r - Pareto-Optimization via Normal Boundary Intersection

# Input:
## 1) Population correlation matrix (R): criterion & predictor inter-correlation
## 2) Population subgroup difference (d): criterion & predictor mean difference
## between minority and majority subgroups
## 3) Proportion of minority applicants (prop):
## prop = (# of minority applicants)/(total # of applicants)
## 4) Selection ratio (sr): sr = (# of selected applicants)/(total # of applicants)

# Related functions:
# myFM
# myCon

###### myFM() ######

#' myFM
#'
#' Supporting function, defines criterion space
#' @param x Input predictor weight vector
#' @return f Criterion vector

myFM = function(x){

  # Obtain within-package 'global' variables
  d <- d_ParetoR
  R <- R_ParetoR

  R_u = R[-nrow(R),-ncol(R)]
  b = x[-1]

  # variance of minority and majority applicant weighted predictor
  # composite (P) distribution (DeCorte, 1999)
  sigma_p = sqrt(t(b)%*%R_u%*%b)

  # mean of minority weighted predictor composite distribution (DeCorte, 1999)
  p_i_bar = 0
  # mean of majority weighted predictor composite distribution (DeCorte, 1999)
  p_a_bar = d%*%x[-1]/sigma_p
  # minority group selection ratio (denoted as h_i in DeCorte et al., 1999)
  SR_B = 1 - pnorm(x[1], p_i_bar)
  # majority group selection ratio (denoted as h_i in DeCorte et al., 1999)
  SR_W = 1 - pnorm(x[1], p_a_bar)

  # AIratio a_g (DeCorte et al., 2007)
  a_g = SR_B/SR_W

  # Composite Validity R_xy
  R_xy = t(c(t(b),0)%*%R%*%c(t(matrix(0,dimFun(R_u)[1],1)),1))/sqrt(t(b)%*%R_u%*%b) # DeCorte et al., 2007

  f = matrix(1,2,1)
  f[1,] = -a_g
  f[2,] = -R_xy

  return(f)

}

####### myCon_ineq() ######

# Nonlinear inequalities at x

#' myCon_ineq
#'
#' Support function, defines inequal constraint condition
#' @param x Input predictor weight vector
#' @return Inequal constraint condition for use in NBI()

myCon_ineq = function(x){return(vector())}

####### myCon_eq() ######

# Nonlinear equalities at x

#' myCon_eq
#'
#' Support function, defines equal constraint condition
#' @param x Input predictor weight vector
#' @return Equal constraint condition for use in NBI()

myCon_eq = function(x){

  # Obtain within-package 'global' variable
  prop <- prop_ParetoR
  sr <- sr_ParetoR
  d <- d_ParetoR
  R <- R_ParetoR

  R_u = R[-nrow(R),-ncol(R)]
  b = x[-1]

  # variance of minority and majority applicant weighted predictor
  # composite (P) distribution (DeCorte, 1999)
  sigma_p = sqrt(t(b)%*%R_u%*%b)

  # mean of minority weighted predictor composite distribution (DeCorte, 1999)
  p_i_bar = 0
  # mean of majority weighted predictor composite distribution (DeCorte, 1999)
  p_a_bar = d%*%x[-1]/sigma_p
  # p_a_bar = (x[2]*1.00+x[3]*0.23+x[4]*0.09+x[5]*0.33)/sigma_p
  # minority group selection ratio (denoted as h_i in DeCorte et al., 1999)
  SR_B = 1 - pnorm(x[1], p_i_bar)
  # majority group selection ratio (denoted as h_i in DeCorte et al., 1999)
  SR_W = 1 - pnorm(x[1], p_a_bar)

  # Nonlinear equalities at x

  ceq = matrix(1,2,1)
  ceq[1,] = SR_B*prop + SR_W*(1-prop) - sr # DeCorte et al. (2007)
  ceq[2,] = (t(b)%*%R_u%*%b) - 1

  return(ceq)

}

########################### Supporting Functions (B) ########################

# Supplementary Functions for NBI.r - Pareto-Optimization via Normal Boundary Intersection

# Function List
## assert_col_vec
## dimFun
## WeightsFun
## Weight_Generate
## myLinCom_Elnet
## myT_Elnet
## myTCon_eq
## plotPareto

###### assert_col_vec() ######

#' assert_col_vec
#'
#' Support function, refines intermediate variable for use in NBI()
#' @param v Intermediate variable v
#' @return Refined variable v

assert_col_vec = function(v){
  if(is.null(dimFun(v))){
    v=v
  }else if(dimFun(v)[1] < dimFun(v)[2]){v = t(t)}
  return(v)}

###### dimFun() ######

#' dimFun
#'
#' Support function, checks input predictor weight vector x
#' @param x Input predictor weight vector
#' @return x Checked and refined input predictor weight vector

dimFun = function(x){
  if(is.null(dim(x))){
    return(c(0,0))
  }else(return(dim(x)))
}

###### WeightsFun() ######

#' WeightsFun
#'
#' Support function, generates all possible weights for NBI subproblems
#' @param n Number of objects (i.e., number of predictor and criterion)
#' @param k Number of Pareto points
#' @return Weights All possible weights for NBI subproblem
#' @export
WeightsFun = function(n, k){

  # global variables
  # weight, Weights, Formers, Layer, lastone, currentone
  #
  # Generates all possible weights for NBI subproblems given:
  # n, the number of objectives
  # 1/k, the uniform spacing between two w_i (k integral)
  # This is essentially all the possible integral partitions
  # of integer k into n parts.

  WeightSub <<- matrix(0,1,n)
  Weights <<- vector()
  Formers <<- vector()
  Layer <<- n
  lastone <<- -1
  currentone <<- -1

  Weight_Generate(1, k)

  return(list(Weights = Weights, Formers = Formers))

}

###### Weight_Generate() ######

#' Weight_Generate
#'
#' Function intended to test the weight generation scheme for NBI for > 2 objectives
#' @param n Number of objects (i.e., number of predictor and criterion)
#' @param k Number of Pareto points
#' @return Weight_Generate

Weight_Generate = function(n, k){

  # global variables:
  # weight Weights Formers Layer lastone currentone

  # wtgener_test(n,k)
  #
  # Intended to test the weight generation scheme for NBI for > 2 objectives
  # n is the number of objectives
  # 1/k is the uniform spacing between two w_i (k integral)

  if(n == Layer){

    if(currentone >= 0){
      Formers <<- c(Formers,lastone)
      lastone <<- currentone
      currentone <<- -1
    }else{
      num = dimFun(Weights)[2]
      Formers <<- c(Formers,num)
    }

    WeightSub[(Layer - n + 1)] <<- k
    Weights <<- cbind(Weights,t(WeightSub))

  }else{

    for(i in 0:k){
      if(n == (Layer - 2)){
        num = dimFun(Weights)[2]
        currentone <<- num+1
      }

      WeightSub[(Layer - n + 1)] <<- i
      Weight_Generate(n+1, k-i)
    }

  }

}

###### myLinCom_Elnet() ######

#' myLinCom_Elnet
#'
#' Support function
#' @param x Input predictor weight vector
#' @return f Criterion vector

myLinCom_Elnet = function(x){

  # Elastic net term SSE
  b = x[-1]/sum(x[-1])
  SSE = lambda*((1-a)*sum(b^2) + a*sum(abs(b)))

  # global variable: g_Weight
  # F   = myFM(x)
  F = myFM(x)

  f = t(g_Weight)%*%F + SSE
  # f = t(g_Weight)%*%F + SSE

  return(f)

}

###### myT_Elnet() ######

#' myT_Elnet
#'
#' Support function, define criterion space for intermediate step in NBI()
#' @param x_t Temporary input weight vector
#' @return f Temporary criterion space

myT_Elnet = function(x_t){

  # Elastic net term SSE
  # b = x_t[-1]/sum(x_t[-1])
  b = x_t[2:(length(x_t)-1)]/sum(x_t[2:(length(x_t)-1)])

  SSE = lambda*((1-a)*sum(b^2) + a*sum(abs(b)))

  f = x_t[length(x_t)] + SSE

  return(f)

}

###### myTCon_eq() ######

#' myTCon_eq
#'
#' Support function, define constraint condition for intermediate step in NBI()
#' @param x_t Temporary input weight vector
#' @return ceq Temporary constraint condition

myTCon_eq = function(x_t){

  # global variables:
  # g_Normal g_StartF

  t = x_t[length(x_t)]
  x = x_t[1:(length(x_t)-1)]

  fe  = -myFM(x) - g_StartF - t * g_Normal

  # c = myCon_ineq(x)
  ceq1 = myCon_eq(x)
  ceq = c(ceq1,fe)

  return(ceq)

}

###### plotPareto() ######

#' plotPareto
#'
#' Function for plotting Pareto-optimal curve and predictor weights
#' @param CriterionOutput Pareto-Optimal criterion solution
#' @param ParetoWeights Pareto-Optimal predictor weight solution
#' @return Plot of Pareto-optimal curve and plot of predictor weights
#' @export
plotPareto = function(CriterionOutput, ParetoWeights){

  par(mfrow=c(1,2))

  AIratio = t(CriterionOutput[1,])
  Criterion = t(CriterionOutput[2,])
  X = t(ParetoWeights[2:nrow(ParetoWeights),])

  # AI ratio - Composite Validity trade-off

  plot(AIratio, Criterion,
       xlim = c(min(AIratio),max(AIratio)),
       main = "Composite Validity -- AI ratio trade-off",
       xlab = "AI ratio",
       ylab = "Composite Validity",
       type='c',col='blue')

  points(AIratio, Criterion,
         pch=8,col='red')

  # Predictor weights

  plot(AIratio,X[,1],
       xlim=c(min(AIratio),max(AIratio)),ylim=c(0,1),
       main = "Predictor weights trade-off function",
       xlab = "AI ratio",
       ylab = "Predictor weight",
       type='c',col='red')
  points(AIratio,X[,1],pch=8,
         col=rainbow(1))

  for(i in 2:ncol(X)){

    lines(AIratio,X[,i],type='c',
          col=rainbow(1, start=((1/ncol(X))*(i-1)), alpha=1))
    points(AIratio,X[,i],pch=8,
           col=rainbow(1, start=((1/ncol(X))*(i-1)), alpha=1))

  }

  legend('topleft',
         legend=c(paste0('Predictor ',1:ncol(X))),
         lty=c(rep(2,ncol(X))),lwd=c(rep(2,ncol(X))),
         col=rainbow(ncol(X)))

}

###### calculate_perf_AI() ######

#' calculate_perf_AI
#'
#' Function for calculating job performance criterion validity and adverse impact ratio (AI ratio) using predictor weights
#' @param data input data (matrix), which consists of predictor scores, performance score (column label: "Perf") and race dummy variable (column label: "Race_dummy")
#' @param weight predictor weights
#' @param prop proportion of minority applicants = (number of minority applicants)/(number of all applicants)
#' @param sr selection ratio = (number of hires) / (number of applicants)
#' @return Perf job performance criterion validity (i.e., correlation between job performance score and predictor weighted composite score)
#' @return AI adverse impact ratio
calculate_perf_AI <- function(data, weight, prop, sr){

  ##give space for Y_hat (n_val*(numpoints+1) matrix of weighted composites)
  Y_hat <- matrix(0, nrow(data_val), dim(weight)[1])
  ##give space for ryi vector
  Perf_val <- c(rep(0,dim(weight)[1]))
  ##give space for rrace vector
  rrace_val <- c(rep(0,dim(weight)[1]))

  for (i in 1:dim(weight)[1]){
    #get weight vector
    weight_vector <- weight[i, (1:dim(weight)[2])]

    #get ryyhat vector
    ##get Y vector
    Y_vector <- data_val[,"Perf"]
    race_vector <- data_val[,"Race_dummy"]
    ##get Y_hat[,i} (weighted composite)
    X_matrix <- data_val[,1:dim(weight)[2]]
    Y_hat[,i] <- t(t(as.matrix(weight_vector))%*%t(as.matrix(X_matrix)))
    ##get ryyhat vector
    Perf_val[i] <- cor(Y_vector,Y_hat[,i])
    rrace_val[i] <- cor(race_vector,Y_hat[,i])
  }

  AI_val = r2AIratio(rrace_val, prop, sr)

  return(out = list(Perf = Perf_val, AI = AI_val))
}


###### calculate_perf_rrace() ######

#' calculate_perf_rrace
#'
#' Function for calculating job performance criterion validity and r_race using predictor weights
#' @param data input data (matrix), which consists of predictor scores, performance score (column label: "Perf") and race dummy variable (column label: "Race_dummy")
#' @param weight predictor weights
#' @param prop proportion of minority applicants = (number of minority applicants)/(number of all applicants)
#' @param sr selection ratio = (number of hires) / (number of applicants)
#' @return Perf job performance criterion validity (i.e., correlation between job performance score and predictor weighted composite score)
#' @return rrace correlation between race dummy variable and predictor composite
calculate_perf_rrace <- function(data, weight, prop, sr){

  ##give space for Y_hat (n_val*(numpoints+1) matrix of weighted composites)
  Y_hat <- matrix(NA, nrow(data), dim(weight)[1])
  ##give space for ryi vector
  Perf <- c(rep(NA,dim(weight)[1]))
  ##give space for rrace vector
  rrace <- c(rep(NA,dim(weight)[1]))

  ## make sure that the weight variate is a matrix
  weight = as.matrix(weight)

  for (i in 1:dim(weight)[1]){
    #get weight vector
    weight_vector <- weight[i, (1:dim(weight)[2])]

    #get ryyhat vector
    ##get Y vector
    Y_vector <- data[,"Perf"]
    race_vector <- data[,"Race_dummy"]
    ##get Y_hat[,i} (weighted composite)
    X_matrix <- data[,1:dim(weight)[2]]
    Y_hat[,i] <- t(t(as.matrix(weight_vector))%*%t(as.matrix(X_matrix)))
    ##get ryyhat vector
    Perf[i] <- cor(Y_vector,Y_hat[,i])
    rrace[i] <- cor(race_vector,Y_hat[,i])
  }

  return(out = list(Perf = Perf, rrace = rrace))
}
Diversity-ParetoOptimal/ParetoR documentation built on May 8, 2024, 12:08 a.m.