# Pareto-Optimization via Normal Boundary Intersection
# Developer: Q. Chelsea Song
# Contact: qianqisong@gmail.com
####################################### NBI Main Function ####################################
#' NBI Main Function
#'
#' Main function for obtaining pareto-optimal solution via normal boundary intersection.
#' @param X0 Initial input for predictor weight vector
#' @param Spac Number of Pareto spaces (i.e., number of Pareto points minus one)
#' @param Fnum Number of criteria
#' @param VLB Lower boundary for weight vector estimation
#' @param VUB Upper boundary for weight vector estimation
#' @param TolX Tolerance index for estimating weight vector, default is 1e-4
#' @param TolF Tolerance index for estimating criterion, default is 1e-4
#' @param TolCon Tolerance index for constraint conditions, default is 1e-7
#' @param graph If TRUE, plots will be generated for Pareto-optimal curve and predictor weights
#' @param display_solution If TRUE, Pareto-optimal solution will be displayed
#' @import nloptr
#' @return Pareto-Optimal solutions
NBI_Elnet = function(X0,Spac,Fnum,VLB=vector(),VUB=vector(),TolX=1e-4,TolF=1e-4,TolCon=1e-7,graph=TRUE,display_solution=TRUE){
cat('\n Estimating Regularized Pareto-Optimal Solution ... \n')
#------------------------------Initialize Options-------------------------------#
X0 = assert_col_vec(X0)
VLB = assert_col_vec(VLB)
VUB = assert_col_vec(VUB)
# Number of variables
nvars = length(X0)
# Set options
# algorithm: sequential (least-squares) quadratic programming algorithm
# (SQP is algorithm for nonlinearly constrained, gradient-based optimization,
# supporting both equality and inequality constraints.)
# maxeval: Max Iterations
# xtol_abs: Tolerance for X
# ftol_abs: Tolerance for F
# tol_constraints_ineq/eq: Tolerance for inequal/equal constraints
# (for reference) MATLAB constraints:
# options = optimoptions('fmincon','Algorithm','sqp','MaxIter',(nvars+1)*1000,'TolFun',TolF,'TolX',TolX,'TolCon',TolCon,'Display','off')
suppressWarnings({
nloptr::nl.opts(optlist = list(
maxeval = (nvars+1)*1000
,xtol_rel = TolX
,ftol_rel = TolF
))
})
# Initialize PHI
PHI = matrix(0,Fnum,Fnum)
#------------------------------Shadow Point-------------------------------#
# cat('\n ----Step 1: find shadow minimum---- \n')
ShadowF = matrix(0,Fnum)
ShadowX = matrix(0,nvars,Fnum)
xstart = X0
out = WeightsFun(Fnum,Spac)
Weight = out$Weights
Near = out$Formers
rm(out)
Weight = Weight/Spac
for(i in 1:Fnum){
temp = c(1,dim(Weight)[2])
j = temp[i]
g_Weight <<- Weight[,j]
fmin = 9999
ntr = nvars-1
fminv = matrix(0,ntr,1)
fminx = matrix(0,nvars,ntr)
for(k in 1:ntr){
xstart = runif(length(X0))
out = suppressWarnings({
nloptr::slsqp(x0 = X0, fn = myLinCom_Elnet
,lower = VLB, upper = VUB
,hin = myCon_ineq
,heq = myCon_eq
)})
x = out$par
f = out$value
rm(out)
fminv[k] = f
fminx[,k] = x
if(f <= fmin){
fmin = f
reps = k
}
}
x = fminx[,reps]
som = 0
for(k in 2:nvars){
som = som + x[k]
}
for(k in 2:nvars){
x[k] = x[k]/som
}
# to make sum of x = 1
ShadowX[,i] = x
ShadowX = round(ShadowX,4)
tempF = -myFM(x)
ShadowF[i] = round(tempF[i],4)
}
#------------------------------Matrix PHI-------------------------------#
# cat('\n ----Step 2: find PHI---- \n')
for(i in 1:Fnum){
PHI[,i] = myFM(ShadowX[,i]) + ShadowF
PHI[i,i] = 0
}
# print(round(PHI,3))
# Check to make sure that QPP is n-1 dimensional
if(rcond(PHI) < 1e-8){stop(' Phi matrix singular, aborting.')}
#------------------------------Quasi-Normal Direction-------------------------------#
# cat('\n ----Step 3: find Quasi-Normal---- \n')
g_Normal <<- -PHI%*%matrix(1,Fnum,1)
#------------------------------weights-------------------------------#
# cat('\n ----Step 4: create weights---- \n')
out = WeightsFun(Fnum,Spac)
Weight = out$Weight
Near = out$Formers
Weight = Weight/Spac
num_w = dimFun(Weight)[2]
# cat('\n Weights in row: \n')
# print(round(Weight,3))
#------------------------------NBI Subproblems-------------------------------#
# cat('\n ----Step 5: solve NBI sub-problems---- \n')
# Starting point for first NBI subproblem is the minimizer of f_1(x)
xstart = c(ShadowX[,1],0)
Pareto_Fmat = vector() # Pareto Optima in F-space
Pareto_Xmat = vector() # Pareto Optima in X-space
X_Near = vector()
# solve NBI subproblems
for(k in 1:num_w){
w = Weight[,k]
# Solve problem only if it is not minimizing one of the individual objectives
indiv_fn_index = which(w == 1)
# the boundary solution which has been solved
w = rev(w)
if(Near[k] > 0){
xstart = X_Near[,Near[k]]
#start X is the previous weight-order's X
}
#start point in F-space
g_StartF <<- PHI%*%w + ShadowF
# SOLVE NBI SUBPROBLEM
out = suppressWarnings({
nloptr::slsqp(x0 = xstart, fn = myT_Elnet
,lower = c(VLB,-Inf)
,upper = c(VUB,Inf)
,hin = myCon_ineq
,heq = myTCon_eq)
})
x_trial = out$par
f = out$value
rm(out)
Pareto_Fmat = cbind(Pareto_Fmat, -myFM(x_trial[1:nvars])) # Pareto optima in F-space
som = 0
for(k in 2:nvars){som = som + x_trial[k]}
for(k in 2:nvars){x_trial[k] = x_trial[k]/som}
Pareto_Xmat = cbind(Pareto_Xmat, x_trial[1:nvars]) # Pareto optima in X-space
X_Near = cbind(X_Near,x_trial)
}
#------------------------------Plot Solutions-------------------------------#
if(graph==TRUE){plotPareto(Pareto_Fmat, Pareto_Xmat)}
#------------------------------Output Solutions-------------------------------#
# Output Solution
Pareto_Fmat = t(Pareto_Fmat)
Pareto_Xmat = t(Pareto_Xmat[2:nrow(Pareto_Xmat),])
colnames(Pareto_Fmat) = c("AI.ratio","Criterion.Validity")
colnames(Pareto_Xmat) = c(paste0(rep("P",(nvars-1)),1:(nvars-1)))
if(display_solution == TRUE){
solution = round(cbind(Pareto_Fmat,Pareto_Xmat),3)
colnames(solution) = c("AI.ratio","Criterion.Validity", paste0(rep("P",(nvars-1)),1:(nvars-1)))
cat("\n Pareto-Optimal Solution \n \n")
print(solution)
}else{
cat("\n Done. \n \n")
}
return(list(Pareto_Fmat = round(Pareto_Fmat, 3),
Pareto_Xmat = round(Pareto_Xmat, 3)))
}
########################### Supporting Functions (A) ########################
# User-Defined Input for NBI.r - Pareto-Optimization via Normal Boundary Intersection
# Input:
## 1) Population correlation matrix (R): criterion & predictor inter-correlation
## 2) Population subgroup difference (d): criterion & predictor mean difference
## between minority and majority subgroups
## 3) Proportion of minority applicants (prop):
## prop = (# of minority applicants)/(total # of applicants)
## 4) Selection ratio (sr): sr = (# of selected applicants)/(total # of applicants)
# Related functions:
# myFM
# myCon
###### myFM() ######
#' myFM
#'
#' Supporting function, defines criterion space
#' @param x Input predictor weight vector
#' @return f Criterion vector
myFM = function(x){
# Obtain within-package 'global' variables
d <- d_ParetoR
R <- R_ParetoR
R_u = R[-nrow(R),-ncol(R)]
b = x[-1]
# variance of minority and majority applicant weighted predictor
# composite (P) distribution (DeCorte, 1999)
sigma_p = sqrt(t(b)%*%R_u%*%b)
# mean of minority weighted predictor composite distribution (DeCorte, 1999)
p_i_bar = 0
# mean of majority weighted predictor composite distribution (DeCorte, 1999)
p_a_bar = d%*%x[-1]/sigma_p
# minority group selection ratio (denoted as h_i in DeCorte et al., 1999)
SR_B = 1 - pnorm(x[1], p_i_bar)
# majority group selection ratio (denoted as h_i in DeCorte et al., 1999)
SR_W = 1 - pnorm(x[1], p_a_bar)
# AIratio a_g (DeCorte et al., 2007)
a_g = SR_B/SR_W
# Composite Validity R_xy
R_xy = t(c(t(b),0)%*%R%*%c(t(matrix(0,dimFun(R_u)[1],1)),1))/sqrt(t(b)%*%R_u%*%b) # DeCorte et al., 2007
f = matrix(1,2,1)
f[1,] = -a_g
f[2,] = -R_xy
return(f)
}
####### myCon_ineq() ######
# Nonlinear inequalities at x
#' myCon_ineq
#'
#' Support function, defines inequal constraint condition
#' @param x Input predictor weight vector
#' @return Inequal constraint condition for use in NBI()
myCon_ineq = function(x){return(vector())}
####### myCon_eq() ######
# Nonlinear equalities at x
#' myCon_eq
#'
#' Support function, defines equal constraint condition
#' @param x Input predictor weight vector
#' @return Equal constraint condition for use in NBI()
myCon_eq = function(x){
# Obtain within-package 'global' variable
prop <- prop_ParetoR
sr <- sr_ParetoR
d <- d_ParetoR
R <- R_ParetoR
R_u = R[-nrow(R),-ncol(R)]
b = x[-1]
# variance of minority and majority applicant weighted predictor
# composite (P) distribution (DeCorte, 1999)
sigma_p = sqrt(t(b)%*%R_u%*%b)
# mean of minority weighted predictor composite distribution (DeCorte, 1999)
p_i_bar = 0
# mean of majority weighted predictor composite distribution (DeCorte, 1999)
p_a_bar = d%*%x[-1]/sigma_p
# p_a_bar = (x[2]*1.00+x[3]*0.23+x[4]*0.09+x[5]*0.33)/sigma_p
# minority group selection ratio (denoted as h_i in DeCorte et al., 1999)
SR_B = 1 - pnorm(x[1], p_i_bar)
# majority group selection ratio (denoted as h_i in DeCorte et al., 1999)
SR_W = 1 - pnorm(x[1], p_a_bar)
# Nonlinear equalities at x
ceq = matrix(1,2,1)
ceq[1,] = SR_B*prop + SR_W*(1-prop) - sr # DeCorte et al. (2007)
ceq[2,] = (t(b)%*%R_u%*%b) - 1
return(ceq)
}
########################### Supporting Functions (B) ########################
# Supplementary Functions for NBI.r - Pareto-Optimization via Normal Boundary Intersection
# Function List
## assert_col_vec
## dimFun
## WeightsFun
## Weight_Generate
## myLinCom_Elnet
## myT_Elnet
## myTCon_eq
## plotPareto
###### assert_col_vec() ######
#' assert_col_vec
#'
#' Support function, refines intermediate variable for use in NBI()
#' @param v Intermediate variable v
#' @return Refined variable v
assert_col_vec = function(v){
if(is.null(dimFun(v))){
v=v
}else if(dimFun(v)[1] < dimFun(v)[2]){v = t(t)}
return(v)}
###### dimFun() ######
#' dimFun
#'
#' Support function, checks input predictor weight vector x
#' @param x Input predictor weight vector
#' @return x Checked and refined input predictor weight vector
dimFun = function(x){
if(is.null(dim(x))){
return(c(0,0))
}else(return(dim(x)))
}
###### WeightsFun() ######
#' WeightsFun
#'
#' Support function, generates all possible weights for NBI subproblems
#' @param n Number of objects (i.e., number of predictor and criterion)
#' @param k Number of Pareto points
#' @return Weights All possible weights for NBI subproblem
#' @export
WeightsFun = function(n, k){
# global variables
# weight, Weights, Formers, Layer, lastone, currentone
#
# Generates all possible weights for NBI subproblems given:
# n, the number of objectives
# 1/k, the uniform spacing between two w_i (k integral)
# This is essentially all the possible integral partitions
# of integer k into n parts.
WeightSub <<- matrix(0,1,n)
Weights <<- vector()
Formers <<- vector()
Layer <<- n
lastone <<- -1
currentone <<- -1
Weight_Generate(1, k)
return(list(Weights = Weights, Formers = Formers))
}
###### Weight_Generate() ######
#' Weight_Generate
#'
#' Function intended to test the weight generation scheme for NBI for > 2 objectives
#' @param n Number of objects (i.e., number of predictor and criterion)
#' @param k Number of Pareto points
#' @return Weight_Generate
Weight_Generate = function(n, k){
# global variables:
# weight Weights Formers Layer lastone currentone
# wtgener_test(n,k)
#
# Intended to test the weight generation scheme for NBI for > 2 objectives
# n is the number of objectives
# 1/k is the uniform spacing between two w_i (k integral)
if(n == Layer){
if(currentone >= 0){
Formers <<- c(Formers,lastone)
lastone <<- currentone
currentone <<- -1
}else{
num = dimFun(Weights)[2]
Formers <<- c(Formers,num)
}
WeightSub[(Layer - n + 1)] <<- k
Weights <<- cbind(Weights,t(WeightSub))
}else{
for(i in 0:k){
if(n == (Layer - 2)){
num = dimFun(Weights)[2]
currentone <<- num+1
}
WeightSub[(Layer - n + 1)] <<- i
Weight_Generate(n+1, k-i)
}
}
}
###### myLinCom_Elnet() ######
#' myLinCom_Elnet
#'
#' Support function
#' @param x Input predictor weight vector
#' @return f Criterion vector
myLinCom_Elnet = function(x){
# Elastic net term SSE
b = x[-1]/sum(x[-1])
SSE = lambda*((1-a)*sum(b^2) + a*sum(abs(b)))
# global variable: g_Weight
# F = myFM(x)
F = myFM(x)
f = t(g_Weight)%*%F + SSE
# f = t(g_Weight)%*%F + SSE
return(f)
}
###### myT_Elnet() ######
#' myT_Elnet
#'
#' Support function, define criterion space for intermediate step in NBI()
#' @param x_t Temporary input weight vector
#' @return f Temporary criterion space
myT_Elnet = function(x_t){
# Elastic net term SSE
# b = x_t[-1]/sum(x_t[-1])
b = x_t[2:(length(x_t)-1)]/sum(x_t[2:(length(x_t)-1)])
SSE = lambda*((1-a)*sum(b^2) + a*sum(abs(b)))
f = x_t[length(x_t)] + SSE
return(f)
}
###### myTCon_eq() ######
#' myTCon_eq
#'
#' Support function, define constraint condition for intermediate step in NBI()
#' @param x_t Temporary input weight vector
#' @return ceq Temporary constraint condition
myTCon_eq = function(x_t){
# global variables:
# g_Normal g_StartF
t = x_t[length(x_t)]
x = x_t[1:(length(x_t)-1)]
fe = -myFM(x) - g_StartF - t * g_Normal
# c = myCon_ineq(x)
ceq1 = myCon_eq(x)
ceq = c(ceq1,fe)
return(ceq)
}
###### plotPareto() ######
#' plotPareto
#'
#' Function for plotting Pareto-optimal curve and predictor weights
#' @param CriterionOutput Pareto-Optimal criterion solution
#' @param ParetoWeights Pareto-Optimal predictor weight solution
#' @return Plot of Pareto-optimal curve and plot of predictor weights
#' @export
plotPareto = function(CriterionOutput, ParetoWeights){
par(mfrow=c(1,2))
AIratio = t(CriterionOutput[1,])
Criterion = t(CriterionOutput[2,])
X = t(ParetoWeights[2:nrow(ParetoWeights),])
# AI ratio - Composite Validity trade-off
plot(AIratio, Criterion,
xlim = c(min(AIratio),max(AIratio)),
main = "Composite Validity -- AI ratio trade-off",
xlab = "AI ratio",
ylab = "Composite Validity",
type='c',col='blue')
points(AIratio, Criterion,
pch=8,col='red')
# Predictor weights
plot(AIratio,X[,1],
xlim=c(min(AIratio),max(AIratio)),ylim=c(0,1),
main = "Predictor weights trade-off function",
xlab = "AI ratio",
ylab = "Predictor weight",
type='c',col='red')
points(AIratio,X[,1],pch=8,
col=rainbow(1))
for(i in 2:ncol(X)){
lines(AIratio,X[,i],type='c',
col=rainbow(1, start=((1/ncol(X))*(i-1)), alpha=1))
points(AIratio,X[,i],pch=8,
col=rainbow(1, start=((1/ncol(X))*(i-1)), alpha=1))
}
legend('topleft',
legend=c(paste0('Predictor ',1:ncol(X))),
lty=c(rep(2,ncol(X))),lwd=c(rep(2,ncol(X))),
col=rainbow(ncol(X)))
}
###### calculate_perf_AI() ######
#' calculate_perf_AI
#'
#' Function for calculating job performance criterion validity and adverse impact ratio (AI ratio) using predictor weights
#' @param data input data (matrix), which consists of predictor scores, performance score (column label: "Perf") and race dummy variable (column label: "Race_dummy")
#' @param weight predictor weights
#' @param prop proportion of minority applicants = (number of minority applicants)/(number of all applicants)
#' @param sr selection ratio = (number of hires) / (number of applicants)
#' @return Perf job performance criterion validity (i.e., correlation between job performance score and predictor weighted composite score)
#' @return AI adverse impact ratio
calculate_perf_AI <- function(data, weight, prop, sr){
##give space for Y_hat (n_val*(numpoints+1) matrix of weighted composites)
Y_hat <- matrix(0, nrow(data_val), dim(weight)[1])
##give space for ryi vector
Perf_val <- c(rep(0,dim(weight)[1]))
##give space for rrace vector
rrace_val <- c(rep(0,dim(weight)[1]))
for (i in 1:dim(weight)[1]){
#get weight vector
weight_vector <- weight[i, (1:dim(weight)[2])]
#get ryyhat vector
##get Y vector
Y_vector <- data_val[,"Perf"]
race_vector <- data_val[,"Race_dummy"]
##get Y_hat[,i} (weighted composite)
X_matrix <- data_val[,1:dim(weight)[2]]
Y_hat[,i] <- t(t(as.matrix(weight_vector))%*%t(as.matrix(X_matrix)))
##get ryyhat vector
Perf_val[i] <- cor(Y_vector,Y_hat[,i])
rrace_val[i] <- cor(race_vector,Y_hat[,i])
}
AI_val = r2AIratio(rrace_val, prop, sr)
return(out = list(Perf = Perf_val, AI = AI_val))
}
###### calculate_perf_rrace() ######
#' calculate_perf_rrace
#'
#' Function for calculating job performance criterion validity and r_race using predictor weights
#' @param data input data (matrix), which consists of predictor scores, performance score (column label: "Perf") and race dummy variable (column label: "Race_dummy")
#' @param weight predictor weights
#' @param prop proportion of minority applicants = (number of minority applicants)/(number of all applicants)
#' @param sr selection ratio = (number of hires) / (number of applicants)
#' @return Perf job performance criterion validity (i.e., correlation between job performance score and predictor weighted composite score)
#' @return rrace correlation between race dummy variable and predictor composite
calculate_perf_rrace <- function(data, weight, prop, sr){
##give space for Y_hat (n_val*(numpoints+1) matrix of weighted composites)
Y_hat <- matrix(NA, nrow(data), dim(weight)[1])
##give space for ryi vector
Perf <- c(rep(NA,dim(weight)[1]))
##give space for rrace vector
rrace <- c(rep(NA,dim(weight)[1]))
## make sure that the weight variate is a matrix
weight = as.matrix(weight)
for (i in 1:dim(weight)[1]){
#get weight vector
weight_vector <- weight[i, (1:dim(weight)[2])]
#get ryyhat vector
##get Y vector
Y_vector <- data[,"Perf"]
race_vector <- data[,"Race_dummy"]
##get Y_hat[,i} (weighted composite)
X_matrix <- data[,1:dim(weight)[2]]
Y_hat[,i] <- t(t(as.matrix(weight_vector))%*%t(as.matrix(X_matrix)))
##get ryyhat vector
Perf[i] <- cor(Y_vector,Y_hat[,i])
rrace[i] <- cor(race_vector,Y_hat[,i])
}
return(out = list(Perf = Perf, rrace = rrace))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.