# test our ability to use NLP to hit targets
# goal is to build unknown DETAILED age x yos WF and avg pay matrix consistent
# with known SUMMARY age by yos WF and avgpay matrix
# and with known DETAILED WF and avg pay amounts by age (row sums or avgs) and yos (col sums or avgs)
library(plyr)
library(dplyr)
library(lubridate)
library(magrittr)
library(readr)
library(stringr)
library(tidyr)
library(XLConnect) # slow but convenient because it reads ranges
library(nloptr)
library(btools)
ddir <- "./Data/"
fn <- "MakePrototypes(4).xlsx"
sheetname <- "TestProbWFAndPay"
# get TRUE detailed matrices - normally we won't have these
wf.d.true <- as.matrix(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="B14:h20"))
avgpay.d.true <- as.matrix(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="B35:h41"))
totpay.d.true <- wf.d.true * avgpay.d.true
sum(wf.d.true)
sum(totpay.d.true)
sum(totpay.d.true) / sum(wf.d.true) # grand average pay from details
rnames.d <- 20:26
cnames.d <- 0:6
# read true known SUMMARY wf matrix and avgpay matrix
wf.s <- as.matrix(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="B5:D7"))
avgpay.s <- as.matrix(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="B26:D28"))
totpay.s <- wf.s * avgpay.s
sum(wf.s)
sum(totpay.s)
sum(totpay.s) / sum(wf.s) # grand average pay from details
nrows.s <- nrow(wf.s)
ncols.s <- ncol(wf.s)
# get known DETAILED wf rowsums by age, colsums by yos
wf.age.d <- as.vector(t(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="i14:i20"))) # must transpose
wf.yos.d <- as.vector(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="b21:h21"))
sum(wf.s) == sum(wf.age.d)
sum(wf.s) == sum(wf.yos.d)
nrows.d <- length(wf.age.d)
ncols.d <- length(wf.yos.d)
# get known DETAILED avgpay averages by age, averages by yos
avgpay.s.age.d <- as.vector(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="i35:i41"))
avgpay.s.yos.d <- as.vector(readWorksheetFromFile(paste0(ddir, fn), sheet=sheetname, header=FALSE, region="b42:h42"))
# create initial guesses
wf.d.guess <- matrix(nrow=nrows.d, ncol=ncols.d)
avgpay.d.guess <- matrix(nrow=nrows.d, ncol=ncols.d)
# for now, use averages in each cell to be collapsed
# we need a crosswalk between the row-col indexes of the summary matrices and of the detailed matrices
# this is it for now, but should be improved upon
recipe.s <- "dimtype, sindex, dindex.first, dindex.last
row, 1, 1, 3
row, 2, 4, 5
row, 3, 6, 7
col, 1, 1, 3
col, 2, 4, 5
col, 3, 6, 7"
recipe <- read_csv(str_replace_all(recipe.s, " ", ""))
# get dimensions
getdim <- function(row, col) {
row.first <- as.numeric(recipe[recipe$dimtype=="row" & recipe$sindex==row, "dindex.first"])
row.last <- as.numeric(recipe[recipe$dimtype=="row" & recipe$sindex==row, "dindex.last"])
col.first <- as.numeric(recipe[recipe$dimtype=="col" & recipe$sindex==col, "dindex.first"])
col.last <- as.numeric(recipe[recipe$dimtype=="col" & recipe$sindex==col, "dindex.last"])
dimlist <- list()
dimlist$rows <- row.first : row.last
dimlist$cols <- col.first : col.last
dimlist$len <- length(dimlist$rows) * length(dimlist$cols)
return(dimlist)
}
getdim(3, 2)
fillguess <- function(dmat, smat, nrows.s, ncols.s, avg=FALSE){
# detailed matrix, summary matrix, n summary rows, n summary columns
for(srow in 1:nrows.s) {
for(scol in 1:ncols.s) {
diminfo <- getdim(srow, scol)
if(avg==TRUE) dmat[diminfo$rows, diminfo$cols] <- smat[srow, scol] / diminfo$len else
dmat[diminfo$rows, diminfo$cols] <- smat[srow, scol]
}
}
return(dmat)
}
wf.d.guess <- fillguess(wf.d.guess, wf.s, nrows.s, ncols.s, avg=TRUE)
wf.d.guess
sum(wf.d.guess)
avgpay.d.guess <- fillguess(avgpay.d.guess, avgpay.s, nrows.s, ncols.s)
avgpay.d.guess
# now we are ready to set up the NLP
# set up an NLP to get an age by yos matrix of actives, by individual years, that makes sense relative to other data we have
# the x's to search for are the individual-year cells in the age x yos matrix
# the constraints that must be satisfied (within a tolerance) are all of the known aggregates, including:
# grouped cell-totals of # actives in the age x yos matrix
# row totals and column totals for that matrix (whicy by definition will be satisfied, so we won't use them)
# totals for individual ages and individual yos's, from elsewhere in the av
# I'll start out with an objective function where we want to minimize the squared difference of calculated total employees vs. known total emps
# but we know we will satisfy this
# slsqp allows equality constraints (convenient) but requires gradients, allows nonlinear constraints
cells <- merge(select(agedf, age), select(yosdf, yos)) %>%
mutate(x=0) # cartesion product - sparse representation of the age x yos matrix
glimpse(cells)
# define constraint vector - each must be less than zero
evalgeq <- function(x, cells) {
# equality constraints
# start by setting the
h <- numeric(6)
# all of these must be <=0
h[1] <- 127 - 2*x[1]^2 - 3*x[2]^4 - x[3] - 4*x[4]^2 - 5*x[5]
h[2] <- 282 - 7*x[1] - 3*x[2] - 10*x[3]^2 - x[4] + x[5]
h[3] <- 196 - 23*x[1] - x[2]^2 - 6*x[6]^2 + 8*x[7]
h[4] <- -4*x[1]^2 - x[2]^2 + 3*x[1]*x[2] -2*x[3]^2 - 5*x[6] +11*x[7]
# x1+x2~ 4.282 -> (a) x1+x2 < 1.1*4.282, (b) x1+x2 > .9*4.282
tol <- .01
val <- 5
h[5] <- x[1] + x[2] - (1+tol) * val
h[6] <- -x[1] - x[2] + (1-tol) * val
return(-h) # note the minus sign
}
# define objective function 267428
fn.hs100 <- function(x) {
(x[1]-10)^2 + 5*(x[2]-12)^2 + x[3]^4 + 3*(x[4]-11)^2 + 10*x[5]^6 +
7*x[6]^2 + x[7]^4 - 4*x[6]*x[7] - 10*x[6] - 8*x[7]
}
hin.hs100 <- function(x) {
h <- numeric(4)
h[1] <- 127 - 2*x[1]^2 - 3*x[2]^4 - x[3] - 4*x[4]^2 - 5*x[5]
h[2] <- 282 - 7*x[1] - 3*x[2] - 10*x[3]^2 - x[4] + x[5]
h[3] <- 196 - 23*x[1] - x[2]^2 - 6*x[6]^2 + 8*x[7]
h[4] <- -4*x[1]^2 - x[2]^2 + 3*x[1]*x[2] -2*x[3]^2 - 5*x[6] +11*x[7]
return(h)
}
x0.hs100 <- c(1, 2, 0, 4, 0, 1, 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.