# ----------------------------------------------------------------------------
# R-code (www.r-project.org/) for the Structural Matching Model
#
# Copyright (c) 2016 Thilo Klein
#
# This library is distributed under the terms of the GNU Public License (GPL)
# for full details see the file LICENSE
#
# ----------------------------------------------------------------------------
#' @title Matching model and selection correction for college admissions
#'
#' @description The function provides a Gibbs sampler for a structural matching model that
#' estimates preferences and corrects for sample selection bias when the selection process
#' is a two-sided matching game; i.e., a matching of students to colleges.
#'
#' The structural model consists of a selection and an outcome equation. The \emph{Selection Equation}
#' determines which matches are observed (\eqn{D=1}) and which are not (\eqn{D=0}).
#' \deqn{ \begin{array}{lcl}
#' D &= & 1[V \in \Gamma] \\
#' V &= & W\beta + \eta
#' \end{array}
#' }{ D = 1[V in \Gamma] with V = W\beta + \eta
#' }
#' Here, \eqn{V} is a vector of latent valuations of \emph{all feasible} matches, ie observed and
#' unobserved, and \eqn{1[.]} is the Iverson bracket.
#' A match is observed if its match valuation is in the set of valuations \eqn{\Gamma}
#' that satisfy the equilibrium condition (see Sorensen, 2007).
#' The match valuation \eqn{V} is a linear function of \eqn{W}, a matrix of characteristics for
#' \emph{all feasible} matches, and \eqn{\eta}, a vector of random errors. \eqn{\beta} is a paramter
#' vector to be estimated.
#'
#' The \emph{Outcome Equation} determines the outcome for \emph{observed} matches. The dependent
#' variable can either be continuous or binary, dependent on the value of the \code{binary}
#' argument. In the binary case, the dependent variable \eqn{R} is determined by a threshold
#' rule for the latent variable \eqn{Y}.
#' \deqn{ \begin{array}{lcl}
#' R &= & 1[Y > c] \\
#' Y &= & X\alpha + \epsilon
#' \end{array}
#' }{ R = 1[Y > c] with Y = X\alpha + \epsilon
#' }
#' Here, \eqn{Y} is a linear function of \eqn{X}, a matrix of characteristics for \emph{observed}
#' matches, and \eqn{\epsilon}, a vector of random errors. \eqn{\alpha} is a paramter vector to
#' be estimated.
#'
#' The structural model imposes a linear relationship between the error terms of both equations
#' as \eqn{\epsilon = \kappa\eta + \nu}, where \eqn{\nu} is a vector of random errors and \eqn{\kappa}
#' is the covariance paramter to be estimated. If \eqn{\kappa} were zero, the marginal distributions
#' of \eqn{\epsilon} and \eqn{\eta} would be independent and the selection problem would vanish.
#' That is, the observed outcomes would be a random sample from the population of interest.
#'
#' @param OUT data frame with characteristics of all observed matches, including
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param SEL optional: data frame with characteristics of all observed and unobserved matches, including
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param colleges character vector of variable names for college characteristics. These variables carry the same value for any college.
#' @param students character vector of variable names for student characteristics. These variables carry the same value for any student.
#' @param outcome formula for match outcomes.
#' @param selection formula for match valuations.
#' @param binary logical: if \code{TRUE} outcome variable is taken to be binary; if \code{FALSE} outcome variable is taken to be continuous.
#' @param niter number of iterations to use for the Gibbs sampler.
#' @param gPrior logical: if \code{TRUE} the g-prior (Zellner, 1986) is used for the variance-covariance matrix. (Not yet implemented)
#' @param censored draws of the \code{kappa} parameter that estimates the covariation between the error terms in selection and outcome equation are 0:not censored, 1:censored from below, 2:censored from above.
#' @param thin integer indicating the level of thinning in the MCMC draws. The default \code{thin=1} saves every draw, \code{thin=2} every second, etc.
#' @param nCores number of cores to be used in parallel Gibbs sampling.
#' @param ... .
#'
# @param selection.college formula for match valuations of colleges. Is ignored when \code{selection} is provided.
# @param selection.student formula for match valuations of students. Is ignored when \code{selection} is provided.
# @param s.prefs list of matrices (one element for each market) of dimension \code{nColleges} \code{x} \code{nStudents} with the \code{j}th
# column containing student \code{j}'s ranking over colleges in decreasing order of
# preference (i.e. most preferred first).
# @param c.prefs list of matrices (one element for each market) of dimension \code{nStudents} \code{x} \code{nColleges} with the \code{i}th
# column containing college \code{i}'s ranking over students in decreasing order of
# preference (i.e. most preferred first).
# @param SELs optional: same as \code{SEL} but for student valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
# @param SELc optional: same as \code{SEL} but for college valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
#'
#' @export
#'
#' @useDynLib matchingMarkets, .registration = TRUE
#'
#' @import stats lattice parallel RcppProgress
#' @importFrom Rcpp evalCpp
#' @importFrom graphics par plot
#'
#' @author Thilo Klein
#'
#' @keywords regression
#'
#' @references Sorensen, M. (2007). How Smart is Smart Money? A Two-Sided Matching Model of Venture Capital.
#' \emph{Journal of Finance}, 62 (6): 2725-2762.
#'
#' @examples
#' \dontrun{
#' ## --- SIMULATED EXAMPLE ---
#'
#' ## 1. Simulate two-sided matching data for 20 markets (m=20) with 100 students
#' ## (nStudents=100) per market and 20 colleges with quotas of 5 students, each
#' ## (nSlots=rep(5,20)). True parameters in selection and outcome equations are
#' ## all equal to 1.
#'
#' xdata <- stabsim2(m=20, nStudents=100, nSlots=rep(5,20), verbose=FALSE,
#' colleges = "c1", students = "s1",
#' outcome = ~ c1:s1 + eta + nu,
#' selection = ~ -1 + c1:s1 + eta
#' )
#' head(xdata$OUT)
#'
#'
#' ## 2. Correction for sorting bias when match valuations V are observed
#'
#' ## 2-a. Bias from sorting
#' lm1 <- lm(y ~ c1:s1, data=xdata$OUT)
#' summary(lm1)
#'
#' ## 2-b. Cause of the bias
#' with(xdata$OUT, cor(c1*s1, eta))
#'
#' ## 2-c. Correction for sorting bias
#' lm2a <- lm(V ~ -1 + c1:s1, data=xdata$SEL); summary(lm2a)
#' etahat <- lm2a$residuals[xdata$SEL$D==1]
#'
#' lm2b <- lm(y ~ c1:s1 + etahat, data=xdata$OUT)
#' summary(lm2b)
#'
#'
#' ## 3. Correction for sorting bias when match valuations V are unobserved
#'
#' ## 3-a. Run Gibbs sampler (when SEL is given)
#' fit2 <- stabit2(OUT = xdata$OUT,
#' SEL = xdata$SEL,
#' outcome = y ~ c1:s1,
#' selection = ~ -1 + c1:s1,
#' niter=1000
#' )
#'
#' ## 3-b. Alternatively: Run Gibbs sampler (when SEL is not given)
#' fit2 <- stabit2(OUT = xdata$OUT,
#' colleges = "c1",
#' students = "s1",
#' outcome = y ~ c1:s1,
#' selection = ~ -1 + c1:s1,
#' niter=1000
#' )
#'
#'
#' ## 4. Implemented methods
#'
#' ## 4-a. Get coefficients
#' fit2
#'
#' ## 4-b. Coefficient table
#' summary(fit2)
#'
#' ## 4-c. Get marginal effects
#' summary(fit2, mfx=TRUE)
#'
#' ## 4-d. Also try the following functions
#' #coef(fit2)
#' #fitted(fit2)
#' #residuals(fit2)
#' #predict(fit2, newdata=NULL)
#'
#'
#' ## 5. Plot MCMC draws for coefficients
#' plot(fit2)
#' }
stabit2 <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
binary=FALSE, niter, gPrior=FALSE,
censored=1, thin=1, nCores=max(1,detectCores()-1), ...) UseMethod("stabit2")
#' @export
stabit2.default <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
binary=FALSE, niter, gPrior=FALSE,
censored=1, thin=1, nCores=max(1,detectCores()-1), ...){
## ------------------------
## --- 1. Preliminaries ---
## select method based on arguments provided
if(is.list(selection)){
method <- "Klein" # separate selection equations for students and college preferences
selection.college <- selection$college
selection.student <- selection$student
selection <- NULL
if(is.null(OUT)){
method <- "Klein-selection" # no outcome equation
OUT <- SEL$SELs[SEL$SELs$D==1,]
OUT$Y <- rep(0,nrow(OUT))
}
} else{
method <- "Sorensen" # single selection equation with equal sharing rule for student and college utility
}
s.prefs <- NULL
c.prefs <- NULL
if(!is.data.frame(SEL)){
SELs <- SEL$SELs
SELc <- SEL$SELc
s.prefs <- SEL$s.prefs
c.prefs <- SEL$c.prefs
SEL <- NULL
}
## split input data by market
OUT <- split(OUT, OUT$m.id)
if(!is.null(SEL)){
SEL <- split(SEL, SEL$m.id)
SELs <- NULL
SELc <- NULL
} else if(!is.null(SELs) & !is.null(SELc)){
SELs <- split(SELs, SELs$m.id)
SELc <- split(SELc, SELc$m.id)
}
## number of cores need not exceed number of markets
nCores <- min(length(SELc), nCores)
## market and agent identifiers
m.id <- "m.id"
c.id <- "c.id"
s.id <- "s.id"
## empty lists for results
Y=list(); Xmatch=list(); C=list(); Cmatch=list(); S=list(); Smatch=list(); D=list(); d=list()
M=list(); H=list(); indices=list()
c.better=list(); c.worse=list(); s.better=list(); s.worse=list()
c.betterNA=list(); c.worseNA=list(); s.betterNA=list(); s.worseNA=list()
for(i in 1:length(OUT)){
## ------------------------------------------------------------------------------------------
## --- 2. Bring data in consistent order and produce matrices for all equilibrium matches ---
X <- stabit2_inner(iter=i, OUT=OUT[[i]], SEL=SEL[[i]], SELs=SELs[[i]], SELc=SELc[[i]],
colleges=colleges, students=students, outcome=outcome, selection=selection,
selection.student=selection.student, selection.college=selection.college,
s.prefs=s.prefs[[i]], c.prefs=c.prefs[[i]],
method=method)
## continue to write the results per market
Y[[i]] <- as.matrix(X$Y)
Xmatch[[i]] <- as.matrix(X$Xmatch)
C[[i]] <- as.matrix(X$C)
Cmatch[[i]] <- as.matrix(X$Cmatch)
D[[i]] <- as.matrix(X$D)
d[[i]] <- as.matrix(X$d) - 1
M[[i]] <- as.matrix(X$M) - 1
indices[[i]] <- X$indices
if(method=="Klein" | method=="Klein-selection"){
S[[i]] <- as.matrix(X$S)
Smatch[[i]] <- as.matrix(X$Smatch)
H[[i]] <- X$H
} else{
H[[i]] <- as.matrix(X$H)
}
c.better[[i]] <- X$c.better
c.worse[[i]] <- X$c.worse
s.better[[i]] <- X$s.better
s.worse[[i]] <- X$s.worse
c.betterNA[[i]] <- X$c.betterNA
c.worseNA[[i]] <- X$c.worseNA
s.betterNA[[i]] <- X$s.betterNA
s.worseNA[[i]] <- X$s.worseNA
}
## some preliminaries
T <- length(Y); #// Number of markets.
nColleges <- nStudents <- XXmatch <- CC <- SS <- CCmatch <- SSmatch <- list()
L <- rep(list(vector()),T)
studentIds <- collegeId <- rep(list(vector()),T)
## further results
s <- 0
for(i in 1:T){
nColleges[[i]] <- nrow(H[[i]])
nStudents[[i]] <- ncol(H[[i]])
XXmatch[[i]] <- t(Xmatch[[i]]) %*% Xmatch[[i]]
CC[[i]] <- t(C[[i]]) %*% C[[i]]
CCmatch[[i]] <- t(Cmatch[[i]]) %*% Cmatch[[i]]
if(method=="Klein" | method=="Klein-selection"){
SSmatch[[i]] <- t(Smatch[[i]]) %*% Smatch[[i]]
SS[[i]] <- t(S[[i]]) %*% S[[i]]
#collegeId[[i]] <- matrix(NA, nrow=nStudents[[i]], ncol=dim(H[[i]])[3]) ##
}
## Record the id's of students matched to each college, and the id of the college matched to each student.
for(j in 1:nColleges[[i]]){
s <- s+1
L[[i]][j] <- s - 1
studentIds[[s]] <- which(H[[i]][j,] == 1) - 1
}
for(j in 1:nStudents[[i]]){
collegeId[[i]][j] <- which(H[[i]][,j] == 1) - 1
}
}
n <- sum(unlist(nStudents)) # total number of students/matches
N <- sum(unlist(nColleges)) # total number of colleges
## ----------------------------
## --- 3. Run Gibbs sampler ---
#save.image("~/Desktop/play.RData")
#load("~/Desktop/play.RData")
if(nCores > 1){
## split market ids (Ts) and Ns evenly to the number of cores (nCores)
Ts <- split(1:T, cut(1:T, breaks=nCores, labels=FALSE))
Ns <- list()
for(i in 1:length(Ts)){
if(i==1){
Ns[[i]] <- 1:sum(unlist(nColleges[ Ts[[i]] ]))
} else{
Ns[[i]] <- max(Ns[[i-1]]) + ( 1:sum(unlist(nColleges[ Ts[[i]] ])) )
L[ Ts[[i]] ] <- lapply(L[ Ts[[i]] ], function(z){
z - min(unlist(L[ Ts[[i]] ]))
})
}
}
## setup cluster
cl <- makeCluster(nCores)
clusterEvalQ(cl, library(matchingMarkets))
if(method == "Klein" & (!is.null(s.prefs) & !is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]],
H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], SSmatch=SSmatch[Ts[[i]]],
CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], studentIds=studentIds[Ns[[i]]],
collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored,
c.better=c.better[Ts[[i]]], c.worse=c.worse[Ts[[i]]], s.better=s.better[Ts[[i]]], s.worse=s.worse[Ts[[i]]],
c.betterNA=c.betterNA[Ts[[i]]], c.worseNA=c.worseNA[Ts[[i]]], s.betterNA=s.betterNA[Ts[[i]]], s.worseNA=s.worseNA[Ts[[i]]])
})
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel0(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Sr=S, Smatchr=Smatch, Dr=D, dr=d, Mr=M,
Hr=H, nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, SSr=SS, SSmatchr=SSmatch,
CCmatchr=CCmatch, Lr=L, studentIdsr=studentIds,
collegeIdr=collegeId, n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA))
})
} else if(method == "Klein" & (is.null(s.prefs) | is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]],
H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], SSmatch=SSmatch[Ts[[i]]],
CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], studentIds=studentIds[Ns[[i]]],
collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
})
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Sr=S, Smatchr=Smatch, Dr=D, dr=d, Mr=M,
Hr=H, nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, SSr=SS, SSmatchr=SSmatch,
CCmatchr=CCmatch, Lr=L, studentIdsr=studentIds,
collegeIdr=collegeId, n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored))
})
} else if(method == "Klein-selection" & (!is.null(s.prefs) & !is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]],
d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]),
nStudents=unlist(nStudents[Ts[[i]]]), CC=CC[Ts[[i]]], SS=SS[Ts[[i]]],
SSmatch=SSmatch[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])),
N=sum(unlist(nColleges[Ts[[i]]])), niter=niter, thin=thin, T=length(Ts[[i]]),
c.better=c.better[Ts[[i]]], c.worse=c.worse[Ts[[i]]], s.better=s.better[Ts[[i]]], s.worse=s.worse[Ts[[i]]],
c.betterNA=c.betterNA[Ts[[i]]], c.worseNA=c.worseNA[Ts[[i]]], s.betterNA=s.betterNA[Ts[[i]]], s.worseNA=s.worseNA[Ts[[i]]])
})
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Mat0(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D,
dr=d, Mr=M, Hr=H, nCollegesr=nColleges,
nStudentsr=nStudents, CCr=CC, SSr=SS,
SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n,
N=N, niter=niter, thin=thin, T=T,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA))
})
} else if(method == "Klein-selection" & (is.null(s.prefs) | is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]],
d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]),
nStudents=unlist(nStudents[Ts[[i]]]), CC=CC[Ts[[i]]], SS=SS[Ts[[i]]],
SSmatch=SSmatch[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])),
N=sum(unlist(nColleges[Ts[[i]]])), niter=niter, thin=thin, T=length(Ts[[i]]))
})
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D,
dr=d, Mr=M, Hr=H, nCollegesr=nColleges,
nStudentsr=nStudents, CCr=CC, SSr=SS,
SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n,
N=N, niter=niter, thin=thin, T=T))
})
} else if(method == "Sorensen"){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]],
nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]],
n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
})
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Dr=D, dr=d, Mr=M, Hr=H,
nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId,
n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored))
})
}
rm(parObject, Ts, Ns)
stopCluster(cl)
## split results into draws for paramters (res1) and error terms (res2)
res1 <- lapply(res, function(z) z[!names(z) %in% c("etadraws", "deltadraws")])
res2 <- lapply(res, function(z) z[names(z) %in% c("etadraws", "deltadraws")])
rm(res)
## combine results from different cores for error terms
est2 <- lapply(1:length(res2[[1]]), function(z){
do.call(rbind, lapply(res2, function(d) d[[z]] ))
})
names(est2) <- names(res2[[1]]); rm(res2)
## combine results from different cores for parameter draws
toarray <- function(x, y){
array(unlist(lapply(x, function(z) z[[y]] )),
dim = c(nrow(res1[[1]][[y]]), ncol(res1[[1]][[y]]), length(res1)) )
}
est1 <- lapply(1:length(res1[[1]]), function(z){
toarray( res1, names(res1[[1]])[z] )
})
names(est1) <- names(res1[[1]]); rm(res1)
est1 <- lapply(est1, consensusMC)
## combine results for paramters (est1) and error terms (est2)
est <- c(est1, est2); rm(est1, est2)
} else{
#Ts <- 1:T
#Ns <- 1:N
if(method=="Klein" & !is.null(s.prefs) & !is.null(c.prefs) ){
est <- stabit2Sel0(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, binary=binary, niter=niter,
thin=thin, T=T, censored=censored,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA)
} else if(method=="Klein" & (is.null(s.prefs) | is.null(c.prefs)) ){
est <- stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, binary=binary, niter=niter,
thin=thin, T=T, censored=censored)
} else if(method=="Klein-selection" & !is.null(s.prefs) & !is.null(c.prefs) ){
est <- stabit2Mat0(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents),
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, niter=niter, thin=thin, T=T,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA)
} else if(method=="Klein-selection" & (is.null(s.prefs) | is.null(c.prefs)) ){
est <- stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents),
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, niter=niter, thin=thin, T=T)
} else if(method=="Sorensen"){
est <- stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId,
n=n, N=N, binary=binary, niter=niter, thin=thin, T=T, censored=censored)
}
}
# ------------------------------------
# --- 5. Add names to coefficients ---
## variable names
if(method!="Klein-selection"){
an <- colnames(Xmatch[[1]])
}
bn <- colnames(C[[1]])
if(method=="Klein" | method=="Klein-selection"){
cn <- colnames(S[[1]])
}
## parameter draws
rownames(est$betadraws) <- bn
if(method=="Sorensen"){
est$alphadraws <- with(est, rbind(alphadraws, kappadraws))
rownames(est$alphadraws) <- c(an,"kappa")
est$kappadraws <- NULL
} else if(method=="Klein"){
est$alphadraws <- with(est, rbind(alphadraws, kappadraws, lambdadraws))
rownames(est$alphadraws) <- c(an,"kappa","lambda")
est$kappadraws <- NULL
est$lambdadraws <- NULL
rownames(est$gammadraws) = cn
} else if(method=="Klein-selection"){
rownames(est$gammadraws) = cn
}
## posterior means
## The last half of all draws are used in approximating the posterior means and the posterior standard deviations.
niter <- dim(est$betadraws)[2]
startiter <- floor(niter/2)
posMeans <- function(x){
sapply(1:nrow(x), function(z) mean(x[z,startiter:niter]))
}
if(method!="Klein-selection"){
est$alpha <- posMeans(est$alphadraws)
names(est$alpha) <- rownames(est$alphadraws)
}
est$beta <- posMeans(est$betadraws)
names(est$beta) <- bn
if(method=="Klein" | method=="Klein-selection"){
est$gamma <- posMeans(est$gammadraws)
names(est$gamma) <- cn
}
if(method!="Klein-selection" & binary==FALSE){
est$sigma <- sqrt(posMeans(est$sigmasquarenudraws))
est$sigmasquarenudraws <- NULL
}
## error terms
est$eta <- posMeans(est$etadraws)
est$etadraws <- NULL
if(method=="Klein" | method=="Klein-selection"){
est$delta <- posMeans(est$deltadraws)
est$deltadraws <- NULL
}
## vcov
if(method!="Klein-selection"){
est$vcov$alpha <- var(t(est$alphadraws))
}
est$vcov$beta <- var(t(est$betadraws))
if(method=="Klein" | method=="Klein-selection"){
est$vcov$gamma <- var(t(est$gammadraws))
}
## variables
est$C <- do.call(rbind, C)
est$D <- do.call(c, D)
if(method!="Klein-selection"){
est$Y <- do.call(c, Y)
est$X <- do.call(rbind, Xmatch)
if(method=="Klein"){
est$X <- cbind(est$X, eta=est$eta, delta=est$delta)
est$S <- do.call(rbind, S)
} else if(method=="Sorensen"){
est$X <- cbind(est$X, eta=est$eta)
}
} else if(method=="Klein-selection"){
est$S <- do.call(rbind, S)
}
## other output
if(method=="Sorensen"){
est$coefficients <- unlist(with(est, list(o=alpha, s=beta)))
} else if(method=="Klein"){
est$coefficients <- unlist(with(est, list(o=alpha, c=beta, s=gamma)))
} else if(method=="Klein-selection"){
est$coefficients <- unlist(with(est, list(c=beta, s=gamma)))
}
if(method!="Klein-selection"){
est$fitted.values <- with(est, as.vector(X %*% alpha))
est$residuals <- est$Y - est$fitted.values
est$df <- n - ncol(est$X)
est$binary <- binary
est$formula <- outcome
} else{
#est$fitted.values <- with(est, as.vector(X %*% alpha))
#est$residuals <- est$Y - est$fitted.values
est$df <- n*N - ncol(est$C) - ncol(est$S)
#est$binary <- binary
#est$formula <- outcome
}
est$call <- match.call()
est$method <- method
## consolidate
h <- est[grep(pattern="draws",x=names(est))]
est[grep(pattern="draws",x=names(est))] <- NULL
est$draws <- h
h <- est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))]
est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))] <- NULL
est$coefs <- h
h <- est[which(names(est) %in% c("X","S","C","Y","D"))]
est[which(names(est) %in% c("X","S","C","Y","D"))] <- NULL
est$variables <- h
h <- NULL
# ------------------
# --- 6. Returns ---
class(est) <- "stabit2"
return(est)
}
rFormula <- function(formula, data=list(), ...){
mf <- model.frame(formula=formula, data=data)
x <- model.matrix(attr(mf, "terms"), data=mf)
y <- model.response(mf)
as.data.frame(cbind(y,x))
}
consensusMC <- function(subchain){
ddata = length(dim(subchain))
d <- dim(subchain)[1]
sampletotT <- dim(subchain)[2]
M <- dim(subchain)[3]
if ( M==1 ){
theta <- array(subchain[,,1],c(d,sampletotT))
return (theta)
}
## compute sigmahatm & sigmahatm.inverse (= W_s)
sigmahatm <- array(NA,c(d,d,M))
sigmahatM <- matrix(NA,d,d)
sigmahatM.pre <- matrix(NA,d,d)
if (d==1){
for (k in 1:M) { sigmahatm[1,,k] <- var(subchain[1,,k]) }
} else{
for (k in 1:M) { sigmahatm[,,k] <- cov(t(subchain[,,k])) }
}
## compute inverses of covariance matrices (with try()):
sigmahatm.inverse <- array(NA,dim=c(d,d,M))
for (k in 1:M){
res <- try( sigmahatm.inverse[,,k] <- solve(sigmahatm[,,k]), silent=TRUE)
}
sigmahatM <- solve(rowSums(sigmahatm.inverse, dims=2)) # (sum W_s)^(-1)
## compute unified posterior samples
theta <- matrix(NA,nrow=d,ncol=sampletotT) # the resulting posterior samples
wvec <- array(NA, c(d,1))
for (i in 1:sampletotT){
wvec <- rep(0,d)
for (s in 1:M){
wvec <- wvec + sigmahatm.inverse[,,s] %*% subchain[,i,s]
}
theta[,i] <- sigmahatM %*% wvec
}
return(theta)
}
stabit2_inner <- function(iter, OUT, SEL, SELs, SELc, colleges, students,
m.id="m.id", c.id="c.id", s.id="s.id",
outcome, selection, selection.student, selection.college,
s.prefs, c.prefs, method){
## ----------------------------------------------------------------------------------------------
## --- 1. Sort datasets by colleges (c.id) and students (s.id), put equilibrium matches first ---
if(!is.null(SEL)){
## sort SEL by c.id and s.id (based on OUT)
idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
SEL$idSEL <- with(SEL, paste(c.id, s.id, sep="_"))
## which matches in SEL are in OUT?
SEL_top <- SEL[SEL$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SEL[! SEL$idSEL %in% idOUT,]
SEL <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## unique student and college ids
indices <- SEL
uColleges <- sort(unique(indices$c.id))
uStudents <- sort(unique(indices$s.id))
} else if(!is.null(SELs) & !is.null(SELc)){
## sort SELs/SELc by c.id and s.id (based on OUT)
idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
SELs$idSEL <- with(SELs, paste(c.id, s.id, sep="_"))
SELc$idSEL <- with(SELc, paste(c.id, s.id, sep="_"))
## which matches in SELs are in OUT?
SEL_top <- SELs[SELs$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SELs[! SELs$idSEL %in% idOUT,]
SELs <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## which matches in SELc are in OUT?
SEL_top <- SELc[SELc$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SELc[! SELc$idSEL %in% idOUT,]
SELc <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## unique student and college ids
indices <- SELs
uColleges <- sort(unique(indices$c.id))
uStudents <- sort(unique(indices$s.id))
} else{
## get college and student ids from OUT
c.id <- OUT[,c.id]
s.id <- OUT[,s.id]
## unique student and college ids
uColleges <- sort(unique(c.id))
uStudents <- sort(unique(s.id))
## all feasible combinations
combs <- function(uColleges, uStudents){
nColleges <- length(uColleges)
nStudents <- length(uStudents)
data.frame( c.id = c(sapply(uColleges, function(i){ rep(i, nStudents) })),
s.id = rep(uStudents, nColleges),
stringsAsFactors=FALSE )
}
indices <- as.data.frame(combs(uColleges, uStudents))
## index equilibrium matches from x in indices
indices$id <- paste(indices$c.id, indices$s.id, sep="_")
OUT$id <- paste(OUT$c.id, OUT$s.id, sep="_")
## sort indices such that observed matches come first
indices$D <- ifelse(indices$id %in% OUT$id, 1, 0)
indices <- indices[order(indices$D, decreasing=TRUE),]
## sort OUT such that it concordes with indices
OUT <- OUT[match(indices$id[indices$D==1], OUT$id), ]
}
## -----------------------------------------------------------------------------------------
## --- 2. Produce matrices indicating the matches (H) and their position in the data (M) ---
## matrices are of dimension nColleges x nStudents
ind.new <- indices
ind.new$index <- 1:nrow(ind.new)
ind.org <- ind.new[order(ind.new$c.id,ind.new$s.id),]
M <- matrix(ind.org$index, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)
H <- matrix(ind.org$D, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)
## -----------------------------------------------------------------------------------------
## --- 3. Create upper/lower bounds on latent match valuations based on rank order lists ---
if(!is.null(s.prefs) & !is.null(c.prefs)){
cprefs_better <- rbind(NA, c.prefs[-dim(c.prefs)[1],])
cprefs_worse <- rbind(c.prefs[-1,], NA)
sprefs_better <- rbind(NA, s.prefs[-dim(s.prefs)[1],])
sprefs_worse <- rbind(s.prefs[-1,], NA)
mapper <- function(clmn, direction, perspective){
## clmn : column in rank order list (cprefs/sprefs)
## direction : worse or better
## perspective: student or college perspective
if(perspective == "college"){
## creates lookup matrix c.better (c.worse) of dimension CxS such that cell (c,s) gives
## student that college c ranks just above (below) student s.
if(direction == "worse"){
h <- merge(x=data.frame(x=1:max(uStudents)),
y=data.frame(cbind(x=c.prefs[,clmn], y=cprefs_worse[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} else{ ## "better"
h <- merge(x=data.frame(x=1:max(uStudents)),
y=data.frame(cbind(x=c.prefs[,clmn], y=cprefs_better[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} ## end direction
} else{ ## "student"
## creates lookup matrix s.better (s.worse) of dimension CxS such that cell (c,s) gives
## college that student s ranks just above (below) college c.
if(direction == "worse"){
h <- merge(x=data.frame(x=1:max(uColleges)),
y=data.frame(cbind(x=s.prefs[,clmn], y=sprefs_worse[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} else{ ## "better"
h <- merge(x=data.frame(x=1:max(uColleges)),
y=data.frame(cbind(x=s.prefs[,clmn], y=sprefs_better[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} ## end direction
} ## end perspective
}
c.worse <- t(sapply(1:ncol(c.prefs), function(x) mapper(clmn=x, direction="worse", perspective="college"))) -1
c.better <- t(sapply(1:ncol(c.prefs), function(x) mapper(clmn=x, direction="better", perspective="college"))) -1
c.worseNA <- c.worse; c.worseNA[!is.na(c.worseNA)] <- 0; c.worseNA[is.na(c.worseNA)] <- 1
c.betterNA <- c.better; c.betterNA[!is.na(c.betterNA)] <- 0; c.betterNA[is.na(c.betterNA)] <- 1
c.worse[is.na(c.worse)] <- 0
c.better[is.na(c.better)] <- 0
s.worse <- sapply(1:ncol(s.prefs), function(x) mapper(clmn=x, direction="worse", perspective="student")) -1
s.better <- sapply(1:ncol(s.prefs), function(x) mapper(clmn=x, direction="better", perspective="student")) -1
s.worseNA <- s.worse; s.worseNA[!is.na(s.worseNA)] <- 0; s.worseNA[is.na(s.worseNA)] <- 1
s.betterNA <- s.better; s.betterNA[!is.na(s.betterNA)] <- 0; s.betterNA[is.na(s.betterNA)] <- 1
s.worse[is.na(s.worse)] <- 0
s.better[is.na(s.better)] <- 0
} else{
s.better <- NULL; c.better <- NULL; s.worse <- NULL; c.worse <- NULL
s.betterNA <- NULL; c.betterNA <- NULL; s.worseNA <- NULL; c.worseNA <- NULL
}
## ---------------------------------------------------------------------
## --- 4. Produce the datasets to be used based on formulas provided ---
if(is.null(SEL) & is.null(SELs) & is.null(SELc)){
c.OUT <- OUT[!duplicated(OUT$c.id),]
C <- data.frame(apply(data.frame(c.OUT[,colleges]), 2, function(i) i[match(indices$c.id,c.OUT$c.id)]))
names(C) <- colleges
s.OUT <- OUT[!duplicated(OUT$s.id),]
S <- data.frame(apply(data.frame(s.OUT[,students]), 2, function(i) i[match(indices$s.id,s.OUT$s.id)]))
names(S) <- students
## ... and interaction effects
Xmain <- data.frame(C, S)
Xmatch <- rFormula(formula = outcome, data=OUT)
if(method=="Klein"){
C <- rFormula(formula = selection.college, data=Xmain)
S <- rFormula(formula = selection.student, data=Xmain)
} else if(method == "Sorensen"){
C <- rFormula(formula = selection, data=Xmain)
}
} else{
if(method!="Klein-selection"){
Xmatch <- rFormula(formula = outcome, data=OUT)
} else{
Xmatch <- rFormula(formula = selection.student, data=OUT)
}
if(method=="Klein" | method=="Klein-selection"){
C <- rFormula(formula = selection.college, data=SELc)
S <- rFormula(formula = selection.student, data=SELs)
} else if(method == "Sorensen"){
C <- rFormula(formula = selection, data=SEL)
}
}
D <- indices$D
d <- which(D==1)
## -----------------------------
## --- 5. Return the results ---
if(method=="Klein" | method=="Klein-selection"){
return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,], S=S, Smatch=S[D==1,],
D=D, d=d, M=M, H=H, indices=indices$id,
c.better=c.better, c.worse=c.worse, s.better=s.better, s.worse=s.worse,
s.betterNA=s.betterNA, c.betterNA=c.betterNA, s.worseNA=s.worseNA, c.worseNA=c.worseNA) )
} else if(method=="Sorensen"){
return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,],
D=D, d=d, M=M, H=H, indices=indices$id) )
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.