Nothing
# ----------------------------------------------------------------------------
# R-code (www.r-project.org/) for the Structural Matching Model
#
# Copyright (c) 2016 Thilo Klein
#
# This library is distributed under the terms of the GNU Public License (GPL)
# for full details see the file LICENSE
#
# ----------------------------------------------------------------------------
#' @title Matching model and selection correction for college admissions
#'
#' @description The function provides a Gibbs sampler for a structural matching model that
#' estimates preferences and corrects for sample selection bias when the selection process
#' is a two-sided matching game; i.e., a matching of students to colleges.
#'
#' The structural model consists of a selection and an outcome equation. The \emph{Selection Equation}
#' determines which matches are observed (\eqn{D=1}) and which are not (\eqn{D=0}).
#' \deqn{ \begin{array}{lcl}
#' D &= & 1[V \in \Gamma] \\
#' V &= & W\beta + \eta
#' \end{array}
#' }{ D = 1[V in \Gamma] with V = W\beta + \eta
#' }
#' Here, \eqn{V} is a vector of latent valuations of \emph{all feasible} matches, ie observed and
#' unobserved, and \eqn{1[.]} is the Iverson bracket.
#' A match is observed if its match valuation is in the set of valuations \eqn{\Gamma}
#' that satisfy the equilibrium condition (see Sorensen, 2007).
#' The match valuation \eqn{V} is a linear function of \eqn{W}, a matrix of characteristics for
#' \emph{all feasible} matches, and \eqn{\eta}, a vector of random errors. \eqn{\beta} is a paramter
#' vector to be estimated.
#'
#' The \emph{Outcome Equation} determines the outcome for \emph{observed} matches. The dependent
#' variable can either be continuous or binary, dependent on the value of the \code{binary}
#' argument. In the binary case, the dependent variable \eqn{R} is determined by a threshold
#' rule for the latent variable \eqn{Y}.
#' \deqn{ \begin{array}{lcl}
#' R &= & 1[Y > c] \\
#' Y &= & X\alpha + \epsilon
#' \end{array}
#' }{ R = 1[Y > c] with Y = X\alpha + \epsilon
#' }
#' Here, \eqn{Y} is a linear function of \eqn{X}, a matrix of characteristics for \emph{observed}
#' matches, and \eqn{\epsilon}, a vector of random errors. \eqn{\alpha} is a paramter vector to
#' be estimated.
#'
#' The structural model imposes a linear relationship between the error terms of both equations
#' as \eqn{\epsilon = \kappa\eta + \nu}, where \eqn{\nu} is a vector of random errors and \eqn{\kappa}
#' is the covariance paramter to be estimated. If \eqn{\kappa} were zero, the marginal distributions
#' of \eqn{\epsilon} and \eqn{\eta} would be independent and the selection problem would vanish.
#' That is, the observed outcomes would be a random sample from the population of interest.
#'
#' @param OUT data frame with characteristics of all observed matches, including
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param SEL optional: data frame with characteristics of all observed and unobserved matches, including
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param colleges character vector of variable names for college characteristics. These variables carry the same value for any college.
#' @param students character vector of variable names for student characteristics. These variables carry the same value for any student.
#' @param outcome formula for match outcomes.
#' @param selection formula for match valuations.
#' @param binary logical: if \code{TRUE} outcome variable is taken to be binary; if \code{FALSE} outcome variable is taken to be continuous.
#' @param niter number of iterations to use for the Gibbs sampler.
#' @param gPrior logical: if \code{TRUE} the g-prior (Zellner, 1986) is used for the variance-covariance matrix. (Not yet implemented)
#' @param censored draws of the \code{kappa} parameter that estimates the covariation between the error terms in selection and outcome equation are 0:not censored, 1:censored from below, 2:censored from above.
#' @param thin integer indicating the level of thinning in the MCMC draws. The default \code{thin=1} saves every draw, \code{thin=2} every second, etc.
#' @param nCores number of cores to be used in parallel Gibbs sampling.
#' @param verbose logical. When set to \code{TRUE}, writes information messages on the console (recommended). Defaults to \code{FALSE}, which suppresses such messages.
#' @param ... .
#'
# @param selection.college formula for match valuations of colleges. Is ignored when \code{selection} is provided.
# @param selection.student formula for match valuations of students. Is ignored when \code{selection} is provided.
# @param s.prefs list of matrices (one element for each market) of dimension \code{nColleges} \code{x} \code{nStudents} with the \code{j}th
# column containing student \code{j}'s ranking over colleges in decreasing order of
# preference (i.e. most preferred first).
# @param c.prefs list of matrices (one element for each market) of dimension \code{nStudents} \code{x} \code{nColleges} with the \code{i}th
# column containing college \code{i}'s ranking over students in decreasing order of
# preference (i.e. most preferred first).
# @param SELs optional: same as \code{SEL} but for student valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
# @param SELc optional: same as \code{SEL} but for college valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
#'
#' @export
#'
#' @useDynLib matchingMarkets, .registration = TRUE
#'
#' @import stats lattice parallel RcppProgress
#' @importFrom Rcpp evalCpp
#' @importFrom graphics par plot
#'
#' @return
#' \code{stabit2} returns a list of the estimation results with the following elements.
#' \item{sigma}{numeric scalar: standard deviation fixed to 1.}
#' \item{eta}{numeric vector: residuals of the selection equation.}
#' \item{vcov}{List of variance covariance matrices for coefficients alpha and beta of selection and outcome equations.}
#' \item{coefficients}{numeric vector: coefficients of selection and outcome equations.}
#' \item{fitted.values}{numeric vector: fitted values for outcome data.}
#' \item{residuals}{numeric vector: residuals of the outcome equation.}
#' \item{df}{integer: degrees of freedom.}
#' \item{binary}{logical: if \code{TRUE} outcome variable was taken to be binary; if \code{FALSE} outcome variable was taken to be continuous.}
#' \item{formula}{estimated formula.}
#' \item{call}{function call.}
#' \item{method}{One of "Sorensen", "Klein" or "Klein-selection". Method "Sorensen" is used when a single selection equation is passed. It assumes an equal sharing rule for student and college utility. Method "Klein" is used when two selection equations (one for students, one for schools) and one outcome equations are passed. Method "Klein-selection" only models selection and therefore does not require an outcome equations.}
#' \item{draws}{List of Gibbs sampling draws for alpha and beta coefficients.}
#' \item{coefs}{Posterior means of the Gibbs sampling draws.}
#' \item{variables}{List of data used in the estimation.}
#'
#' @author Thilo Klein
#'
#' @keywords regression
#'
#' @references Sorensen, M. (2007). How Smart is Smart Money? A Two-Sided Matching Model of Venture Capital.
#' \emph{Journal of Finance}, 62 (6): 2725-2762.
#'
#' @examples
#' \donttest{
#' ## --- SIMULATED EXAMPLE ---
#'
#' ## 1. Simulate two-sided matching data for 20 markets (m=20) with 100 students
#' ## (nStudents=100) per market and 20 colleges with quotas of 5 students, each
#' ## (nSlots=rep(5,20)). True parameters in selection and outcome equations are
#' ## all equal to 1.
#'
#' xdata <- stabsim2(m=20, nStudents=100, nSlots=rep(5,20), verbose=FALSE,
#' colleges = "c1", students = "s1",
#' outcome = ~ c1:s1 + eta + nu,
#' selection = ~ -1 + c1:s1 + eta
#' )
#' head(xdata$OUT)
#'
#'
#' ## 2. Correction for sorting bias when match valuations V are observed
#'
#' ## 2-a. Bias from sorting
#' lm1 <- lm(y ~ c1:s1, data=xdata$OUT)
#' summary(lm1)
#'
#' ## 2-b. Cause of the bias
#' with(xdata$OUT, cor(c1*s1, eta))
#'
#' ## 2-c. Correction for sorting bias
#' lm2a <- lm(V ~ -1 + c1:s1, data=xdata$SEL); summary(lm2a)
#' etahat <- lm2a$residuals[xdata$SEL$D==1]
#'
#' lm2b <- lm(y ~ c1:s1 + etahat, data=xdata$OUT)
#' summary(lm2b)
#'
#'
#' ## 3. Correction for sorting bias when match valuations V are unobserved
#'
#' ## 3-a. Run Gibbs sampler (when SEL is given)
#' fit2 <- stabit2(OUT = xdata$OUT,
#' SEL = xdata$SEL,
#' outcome = y ~ c1:s1,
#' selection = ~ -1 + c1:s1,
#' niter=1000
#' )
#'
#' ## 3-b. Alternatively: Run Gibbs sampler (when SEL is not given)
#' fit2 <- stabit2(OUT = xdata$OUT,
#' colleges = "c1",
#' students = "s1",
#' outcome = y ~ c1:s1,
#' selection = ~ -1 + c1:s1,
#' niter=1000
#' )
#'
#'
#' ## 4. Implemented methods
#'
#' ## 4-a. Get coefficients
#' fit2
#'
#' ## 4-b. Coefficient table
#' summary(fit2)
#'
#' ## 4-c. Get marginal effects
#' summary(fit2, mfx=TRUE)
#'
#' ## 4-d. Also try the following functions
#' #coef(fit2)
#' #fitted(fit2)
#' #residuals(fit2)
#' #predict(fit2, newdata=NULL)
#'
#'
#' ## 5. Plot MCMC draws for coefficients
#' plot(fit2)
#' }
stabit2 <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
binary=FALSE, niter, gPrior=FALSE,
censored=1, thin=1, nCores=max(1,detectCores()-1), verbose = FALSE, ...) UseMethod("stabit2")
#' @export
stabit2.default <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
binary=FALSE, niter, gPrior=FALSE,
censored=1, thin=1, nCores=max(1,detectCores()-1), verbose = FALSE, ...){
## ------------------------
## --- 1. Preliminaries ---
## select method based on arguments provided
if(is.list(selection)){
method <- "Klein" # separate selection equations for students and college preferences
selection.college <- selection$college
selection.student <- selection$student
selection <- NULL
if(is.null(OUT)){
method <- "Klein-selection" # no outcome equation
OUT <- SEL$SELs[SEL$SELs$D==1,]
OUT$Y <- rep(0,nrow(OUT))
}
} else{
method <- "Sorensen" # single selection equation with equal sharing rule for student and college utility
}
s.prefs <- NULL
c.prefs <- NULL
if(!is.data.frame(SEL)){
SELs <- SEL$SELs
SELc <- SEL$SELc
s.prefs <- SEL$s.prefs
c.prefs <- SEL$c.prefs
SEL <- NULL
}
## split input data by market
OUT <- split(OUT, OUT$m.id)
if(!is.null(SEL)){
SEL <- split(SEL, SEL$m.id)
SELs <- NULL
SELc <- NULL
} else if(!is.null(SELs) & !is.null(SELc)){
SELs <- split(SELs, SELs$m.id)
SELc <- split(SELc, SELc$m.id)
}
## number of cores need not exceed number of markets
nCores <- min(length(SELc), nCores)
## market and agent identifiers
m.id <- "m.id"
c.id <- "c.id"
s.id <- "s.id"
## empty lists for results
Y=list(); Xmatch=list(); C=list(); Cmatch=list(); S=list(); Smatch=list(); D=list(); d=list()
M=list(); H=list(); indices=list()
c.better=list(); c.worse=list(); s.better=list(); s.worse=list()
c.betterNA=list(); c.worseNA=list(); s.betterNA=list(); s.worseNA=list()
for(i in 1:length(OUT)){
## ------------------------------------------------------------------------------------------
## --- 2. Bring data in consistent order and produce matrices for all equilibrium matches ---
X <- stabit2_inner(iter=i, OUT=OUT[[i]], SEL=SEL[[i]], SELs=SELs[[i]], SELc=SELc[[i]],
colleges=colleges, students=students, outcome=outcome, selection=selection,
selection.student=selection.student, selection.college=selection.college,
s.prefs=s.prefs[[i]], c.prefs=c.prefs[[i]],
method=method)
## continue to write the results per market
Y[[i]] <- as.matrix(X$Y)
Xmatch[[i]] <- as.matrix(X$Xmatch)
C[[i]] <- as.matrix(X$C)
Cmatch[[i]] <- as.matrix(X$Cmatch)
D[[i]] <- as.matrix(X$D)
d[[i]] <- as.matrix(X$d) - 1
M[[i]] <- as.matrix(X$M) - 1
indices[[i]] <- X$indices
if(method=="Klein" | method=="Klein-selection"){
S[[i]] <- as.matrix(X$S)
Smatch[[i]] <- as.matrix(X$Smatch)
H[[i]] <- X$H
} else{
H[[i]] <- as.matrix(X$H)
}
c.better[[i]] <- X$c.better
c.worse[[i]] <- X$c.worse
s.better[[i]] <- X$s.better
s.worse[[i]] <- X$s.worse
c.betterNA[[i]] <- X$c.betterNA
c.worseNA[[i]] <- X$c.worseNA
s.betterNA[[i]] <- X$s.betterNA
s.worseNA[[i]] <- X$s.worseNA
}
## some preliminaries
T <- length(Y); #// Number of markets.
nColleges <- nStudents <- XXmatch <- CC <- SS <- CCmatch <- SSmatch <- list()
L <- rep(list(vector()),T)
studentIds <- collegeId <- rep(list(vector()),T)
## further results
s <- 0
for(i in 1:T){
nColleges[[i]] <- nrow(H[[i]])
nStudents[[i]] <- ncol(H[[i]])
XXmatch[[i]] <- t(Xmatch[[i]]) %*% Xmatch[[i]]
CC[[i]] <- t(C[[i]]) %*% C[[i]]
CCmatch[[i]] <- t(Cmatch[[i]]) %*% Cmatch[[i]]
if(method=="Klein" | method=="Klein-selection"){
SSmatch[[i]] <- t(Smatch[[i]]) %*% Smatch[[i]]
SS[[i]] <- t(S[[i]]) %*% S[[i]]
#collegeId[[i]] <- matrix(NA, nrow=nStudents[[i]], ncol=dim(H[[i]])[3]) ##
}
## Record the id's of students matched to each college, and the id of the college matched to each student.
for(j in 1:nColleges[[i]]){
s <- s+1
L[[i]][j] <- s - 1
studentIds[[s]] <- which(H[[i]][j,] == 1) - 1
}
for(j in 1:nStudents[[i]]){
collegeId[[i]][j] <- which(H[[i]][,j] == 1) - 1
}
}
n <- sum(unlist(nStudents)) # total number of students/matches
N <- sum(unlist(nColleges)) # total number of colleges
## ----------------------------
## --- 3. Run Gibbs sampler ---
#save.image("~/Desktop/play.RData")
#load("~/Desktop/play.RData")
if(nCores > 1){
## split market ids (Ts) and Ns evenly to the number of cores (nCores)
Ts <- split(1:T, cut(1:T, breaks=nCores, labels=FALSE))
Ns <- list()
for(i in 1:length(Ts)){
if(i==1){
Ns[[i]] <- 1:sum(unlist(nColleges[ Ts[[i]] ]))
} else{
Ns[[i]] <- max(Ns[[i-1]]) + ( 1:sum(unlist(nColleges[ Ts[[i]] ])) )
L[ Ts[[i]] ] <- lapply(L[ Ts[[i]] ], function(z){
z - min(unlist(L[ Ts[[i]] ]))
})
}
}
## setup cluster
cl <- makeCluster(nCores)
clusterEvalQ(cl, library(matchingMarkets))
if(method == "Klein" & (!is.null(s.prefs) & !is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]],
H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], SSmatch=SSmatch[Ts[[i]]],
CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], studentIds=studentIds[Ns[[i]]],
collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored,
c.better=c.better[Ts[[i]]], c.worse=c.worse[Ts[[i]]], s.better=s.better[Ts[[i]]], s.worse=s.worse[Ts[[i]]],
c.betterNA=c.betterNA[Ts[[i]]], c.worseNA=c.worseNA[Ts[[i]]], s.betterNA=s.betterNA[Ts[[i]]], s.worseNA=s.worseNA[Ts[[i]]])
})
if(verbose == TRUE){
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
}
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel0(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Sr=S, Smatchr=Smatch, Dr=D, dr=d, Mr=M,
Hr=H, nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, SSr=SS, SSmatchr=SSmatch,
CCmatchr=CCmatch, Lr=L, studentIdsr=studentIds,
collegeIdr=collegeId, n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA))
})
} else if(method == "Klein" & (is.null(s.prefs) | is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]],
H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], SSmatch=SSmatch[Ts[[i]]],
CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], studentIds=studentIds[Ns[[i]]],
collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
})
if(verbose == TRUE){
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
}
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Sr=S, Smatchr=Smatch, Dr=D, dr=d, Mr=M,
Hr=H, nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, SSr=SS, SSmatchr=SSmatch,
CCmatchr=CCmatch, Lr=L, studentIdsr=studentIds,
collegeIdr=collegeId, n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored))
})
} else if(method == "Klein-selection" & (!is.null(s.prefs) & !is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]],
d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]),
nStudents=unlist(nStudents[Ts[[i]]]), CC=CC[Ts[[i]]], SS=SS[Ts[[i]]],
SSmatch=SSmatch[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])),
N=sum(unlist(nColleges[Ts[[i]]])), niter=niter, thin=thin, T=length(Ts[[i]]),
c.better=c.better[Ts[[i]]], c.worse=c.worse[Ts[[i]]], s.better=s.better[Ts[[i]]], s.worse=s.worse[Ts[[i]]],
c.betterNA=c.betterNA[Ts[[i]]], c.worseNA=c.worseNA[Ts[[i]]], s.betterNA=s.betterNA[Ts[[i]]], s.worseNA=s.worseNA[Ts[[i]]])
})
if(verbose == TRUE){
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
}
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Mat0(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D,
dr=d, Mr=M, Hr=H, nCollegesr=nColleges,
nStudentsr=nStudents, CCr=CC, SSr=SS,
SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n,
N=N, niter=niter, thin=thin, T=T,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA))
})
} else if(method == "Klein-selection" & (is.null(s.prefs) | is.null(c.prefs)) ){
## split
parObject <- lapply(1:nCores, function(i){
list(C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]],
d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]),
nStudents=unlist(nStudents[Ts[[i]]]), CC=CC[Ts[[i]]], SS=SS[Ts[[i]]],
SSmatch=SSmatch[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])),
N=sum(unlist(nColleges[Ts[[i]]])), niter=niter, thin=thin, T=length(Ts[[i]]))
})
if(verbose == TRUE){
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
}
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D,
dr=d, Mr=M, Hr=H, nCollegesr=nColleges,
nStudentsr=nStudents, CCr=CC, SSr=SS,
SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n,
N=N, niter=niter, thin=thin, T=T))
})
} else if(method == "Sorensen"){
## split
parObject <- lapply(1:nCores, function(i){
list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]],
D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]],
nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]),
XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]],
studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]],
n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])),
binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
})
if(verbose == TRUE){
cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
}
res <- parLapply(cl, parObject, function(d){
with(d, stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch,
Dr=D, dr=d, Mr=M, Hr=H,
nCollegesr=nColleges, nStudentsr=nStudents,
XXmatchr=XXmatch, CCr=CC, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId,
n=n, N=N,
binary=binary, niter=niter, thin=thin, T=T, censored=censored))
})
}
rm(parObject, Ts, Ns)
stopCluster(cl)
## split results into draws for paramters (res1) and error terms (res2)
res1 <- lapply(res, function(z) z[!names(z) %in% c("etadraws", "deltadraws")])
res2 <- lapply(res, function(z) z[names(z) %in% c("etadraws", "deltadraws")])
rm(res)
## combine results from different cores for error terms
est2 <- lapply(1:length(res2[[1]]), function(z){
do.call(rbind, lapply(res2, function(d) d[[z]] ))
})
names(est2) <- names(res2[[1]]); rm(res2)
## combine results from different cores for parameter draws
toarray <- function(x, y){
array(unlist(lapply(x, function(z) z[[y]] )),
dim = c(nrow(res1[[1]][[y]]), ncol(res1[[1]][[y]]), length(res1)) )
}
est1 <- lapply(1:length(res1[[1]]), function(z){
toarray( res1, names(res1[[1]])[z] )
})
names(est1) <- names(res1[[1]]); rm(res1)
est1 <- lapply(est1, consensusMC)
## combine results for paramters (est1) and error terms (est2)
est <- c(est1, est2); rm(est1, est2)
} else{
#Ts <- 1:T
#Ns <- 1:N
if(method=="Klein" & !is.null(s.prefs) & !is.null(c.prefs) ){
est <- stabit2Sel0(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, binary=binary, niter=niter,
thin=thin, T=T, censored=censored,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA)
} else if(method=="Klein" & (is.null(s.prefs) | is.null(c.prefs)) ){
est <- stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, binary=binary, niter=niter,
thin=thin, T=T, censored=censored)
} else if(method=="Klein-selection" & !is.null(s.prefs) & !is.null(c.prefs) ){
est <- stabit2Mat0(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents),
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, niter=niter, thin=thin, T=T,
cbetterr=c.better, cworser=c.worse, sbetterr=s.better, sworser=s.worse,
cbetterNAr=c.betterNA, cworseNAr=c.worseNA, sbetterNAr=s.betterNA, sworseNAr=s.worseNA)
} else if(method=="Klein-selection" & (is.null(s.prefs) | is.null(c.prefs)) ){
est <- stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents),
CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId, n=n, N=N, niter=niter, thin=thin, T=T)
} else if(method=="Sorensen"){
est <- stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Dr=D, dr=d,
Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch,
CCr=CC, CCmatchr=CCmatch, Lr=L,
studentIdsr=studentIds, collegeIdr=collegeId,
n=n, N=N, binary=binary, niter=niter, thin=thin, T=T, censored=censored)
}
}
# ------------------------------------
# --- 5. Add names to coefficients ---
## variable names
if(method!="Klein-selection"){
an <- colnames(Xmatch[[1]])
}
bn <- colnames(C[[1]])
if(method=="Klein" | method=="Klein-selection"){
cn <- colnames(S[[1]])
}
## parameter draws
rownames(est$betadraws) <- bn
if(method=="Sorensen"){
est$alphadraws <- with(est, rbind(alphadraws, kappadraws))
rownames(est$alphadraws) <- c(an,"kappa")
est$kappadraws <- NULL
} else if(method=="Klein"){
est$alphadraws <- with(est, rbind(alphadraws, kappadraws, lambdadraws))
rownames(est$alphadraws) <- c(an,"kappa","lambda")
est$kappadraws <- NULL
est$lambdadraws <- NULL
rownames(est$gammadraws) = cn
} else if(method=="Klein-selection"){
rownames(est$gammadraws) = cn
}
## posterior means
## The last half of all draws are used in approximating the posterior means and the posterior standard deviations.
niter <- dim(est$betadraws)[2]
startiter <- floor(niter/2)
posMeans <- function(x){
sapply(1:nrow(x), function(z) mean(x[z,startiter:niter]))
}
if(method!="Klein-selection"){
est$alpha <- posMeans(est$alphadraws)
names(est$alpha) <- rownames(est$alphadraws)
}
est$beta <- posMeans(est$betadraws)
names(est$beta) <- bn
if(method=="Klein" | method=="Klein-selection"){
est$gamma <- posMeans(est$gammadraws)
names(est$gamma) <- cn
}
if(method!="Klein-selection" & binary==FALSE){
est$sigma <- sqrt(posMeans(est$sigmasquarenudraws))
est$sigmasquarenudraws <- NULL
}
## error terms
est$eta <- posMeans(est$etadraws)
est$etadraws <- NULL
if(method=="Klein" | method=="Klein-selection"){
est$delta <- posMeans(est$deltadraws)
est$deltadraws <- NULL
}
## vcov
if(method!="Klein-selection"){
est$vcov$alpha <- var(t(est$alphadraws))
}
est$vcov$beta <- var(t(est$betadraws))
if(method=="Klein" | method=="Klein-selection"){
est$vcov$gamma <- var(t(est$gammadraws))
}
## variables
est$C <- do.call(rbind, C)
est$D <- do.call(c, D)
if(method!="Klein-selection"){
est$Y <- do.call(c, Y)
est$X <- do.call(rbind, Xmatch)
if(method=="Klein"){
est$X <- cbind(est$X, eta=est$eta, delta=est$delta)
est$S <- do.call(rbind, S)
} else if(method=="Sorensen"){
est$X <- cbind(est$X, eta=est$eta)
}
} else if(method=="Klein-selection"){
est$S <- do.call(rbind, S)
}
## other output
if(method=="Sorensen"){
est$coefficients <- unlist(with(est, list(o=alpha, s=beta)))
} else if(method=="Klein"){
est$coefficients <- unlist(with(est, list(o=alpha, c=beta, s=gamma)))
} else if(method=="Klein-selection"){
est$coefficients <- unlist(with(est, list(c=beta, s=gamma)))
}
if(method!="Klein-selection"){
est$fitted.values <- with(est, as.vector(X %*% alpha))
est$residuals <- est$Y - est$fitted.values
est$df <- n - ncol(est$X)
est$binary <- binary
est$formula <- outcome
} else{
#est$fitted.values <- with(est, as.vector(X %*% alpha))
#est$residuals <- est$Y - est$fitted.values
est$df <- n*N - ncol(est$C) - ncol(est$S)
#est$binary <- binary
#est$formula <- outcome
}
est$call <- match.call()
est$method <- method
## consolidate
h <- est[grep(pattern="draws",x=names(est))]
est[grep(pattern="draws",x=names(est))] <- NULL
est$draws <- h
h <- est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))]
est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))] <- NULL
est$coefs <- h
h <- est[which(names(est) %in% c("X","S","C","Y","D"))]
est[which(names(est) %in% c("X","S","C","Y","D"))] <- NULL
est$variables <- h
h <- NULL
# ------------------
# --- 6. Returns ---
class(est) <- "stabit2"
return(est)
}
rFormula <- function(formula, data=list(), ...){
mf <- model.frame(formula=formula, data=data)
x <- model.matrix(attr(mf, "terms"), data=mf)
y <- model.response(mf)
as.data.frame(cbind(y,x))
}
consensusMC <- function(subchain){
ddata = length(dim(subchain))
d <- dim(subchain)[1]
sampletotT <- dim(subchain)[2]
M <- dim(subchain)[3]
if ( M==1 ){
theta <- array(subchain[,,1],c(d,sampletotT))
return (theta)
}
## compute sigmahatm & sigmahatm.inverse (= W_s)
sigmahatm <- array(NA,c(d,d,M))
sigmahatM <- matrix(NA,d,d)
sigmahatM.pre <- matrix(NA,d,d)
if (d==1){
for (k in 1:M) { sigmahatm[1,,k] <- var(subchain[1,,k]) }
} else{
for (k in 1:M) { sigmahatm[,,k] <- cov(t(subchain[,,k])) }
}
## compute inverses of covariance matrices (with try()):
sigmahatm.inverse <- array(NA,dim=c(d,d,M))
for (k in 1:M){
res <- try( sigmahatm.inverse[,,k] <- solve(sigmahatm[,,k]), silent=TRUE)
}
sigmahatM <- solve(rowSums(sigmahatm.inverse, dims=2)) # (sum W_s)^(-1)
## compute unified posterior samples
theta <- matrix(NA,nrow=d,ncol=sampletotT) # the resulting posterior samples
wvec <- array(NA, c(d,1))
for (i in 1:sampletotT){
wvec <- rep(0,d)
for (s in 1:M){
wvec <- wvec + sigmahatm.inverse[,,s] %*% subchain[,i,s]
}
theta[,i] <- sigmahatM %*% wvec
}
return(theta)
}
stabit2_inner <- function(iter, OUT, SEL, SELs, SELc, colleges, students,
m.id="m.id", c.id="c.id", s.id="s.id",
outcome, selection, selection.student, selection.college,
s.prefs, c.prefs, method){
## ----------------------------------------------------------------------------------------------
## --- 1. Sort datasets by colleges (c.id) and students (s.id), put equilibrium matches first ---
if(!is.null(SEL)){
## sort SEL by c.id and s.id (based on OUT)
idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
SEL$idSEL <- with(SEL, paste(c.id, s.id, sep="_"))
## which matches in SEL are in OUT?
SEL_top <- SEL[SEL$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SEL[! SEL$idSEL %in% idOUT,]
SEL <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## unique student and college ids
indices <- SEL
uColleges <- sort(unique(indices$c.id))
uStudents <- sort(unique(indices$s.id))
} else if(!is.null(SELs) & !is.null(SELc)){
## sort SELs/SELc by c.id and s.id (based on OUT)
idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
SELs$idSEL <- with(SELs, paste(c.id, s.id, sep="_"))
SELc$idSEL <- with(SELc, paste(c.id, s.id, sep="_"))
## which matches in SELs are in OUT?
SEL_top <- SELs[SELs$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SELs[! SELs$idSEL %in% idOUT,]
SELs <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## which matches in SELc are in OUT?
SEL_top <- SELc[SELc$idSEL %in% idOUT,]
SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
SEL_bot <- SELc[! SELc$idSEL %in% idOUT,]
SELc <- rbind(SEL_top, SEL_bot)
rm(SEL_top, SEL_bot)
## unique student and college ids
indices <- SELs
uColleges <- sort(unique(indices$c.id))
uStudents <- sort(unique(indices$s.id))
} else{
## get college and student ids from OUT
c.id <- OUT[,c.id]
s.id <- OUT[,s.id]
## unique student and college ids
uColleges <- sort(unique(c.id))
uStudents <- sort(unique(s.id))
## all feasible combinations
combs <- function(uColleges, uStudents){
nColleges <- length(uColleges)
nStudents <- length(uStudents)
data.frame( c.id = c(sapply(uColleges, function(i){ rep(i, nStudents) })),
s.id = rep(uStudents, nColleges),
stringsAsFactors=FALSE )
}
indices <- as.data.frame(combs(uColleges, uStudents))
## index equilibrium matches from x in indices
indices$id <- paste(indices$c.id, indices$s.id, sep="_")
OUT$id <- paste(OUT$c.id, OUT$s.id, sep="_")
## sort indices such that observed matches come first
indices$D <- ifelse(indices$id %in% OUT$id, 1, 0)
indices <- indices[order(indices$D, decreasing=TRUE),]
## sort OUT such that it concordes with indices
OUT <- OUT[match(indices$id[indices$D==1], OUT$id), ]
}
## -----------------------------------------------------------------------------------------
## --- 2. Produce matrices indicating the matches (H) and their position in the data (M) ---
## matrices are of dimension nColleges x nStudents
ind.new <- indices
ind.new$index <- 1:nrow(ind.new)
ind.org <- ind.new[order(ind.new$c.id,ind.new$s.id),]
M <- matrix(ind.org$index, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)
H <- matrix(ind.org$D, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)
## -----------------------------------------------------------------------------------------
## --- 3. Create upper/lower bounds on latent match valuations based on rank order lists ---
if(!is.null(s.prefs) & !is.null(c.prefs)){
cprefs_better <- rbind(NA, c.prefs[-dim(c.prefs)[1],])
cprefs_worse <- rbind(c.prefs[-1,], NA)
sprefs_better <- rbind(NA, s.prefs[-dim(s.prefs)[1],])
sprefs_worse <- rbind(s.prefs[-1,], NA)
mapper <- function(clmn, direction, perspective){
## clmn : column in rank order list (cprefs/sprefs)
## direction : worse or better
## perspective: student or college perspective
if(perspective == "college"){
## creates lookup matrix c.better (c.worse) of dimension CxS such that cell (c,s) gives
## student that college c ranks just above (below) student s.
if(direction == "worse"){
h <- merge(x=data.frame(x=1:max(uStudents)),
y=data.frame(cbind(x=c.prefs[,clmn], y=cprefs_worse[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} else{ ## "better"
h <- merge(x=data.frame(x=1:max(uStudents)),
y=data.frame(cbind(x=c.prefs[,clmn], y=cprefs_better[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} ## end direction
} else{ ## "student"
## creates lookup matrix s.better (s.worse) of dimension CxS such that cell (c,s) gives
## college that student s ranks just above (below) college c.
if(direction == "worse"){
h <- merge(x=data.frame(x=1:max(uColleges)),
y=data.frame(cbind(x=s.prefs[,clmn], y=sprefs_worse[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} else{ ## "better"
h <- merge(x=data.frame(x=1:max(uColleges)),
y=data.frame(cbind(x=s.prefs[,clmn], y=sprefs_better[,clmn])),
by="x", all.x=TRUE); h[order(h$x),"y"]
} ## end direction
} ## end perspective
}
c.worse <- t(sapply(1:ncol(c.prefs), function(x) mapper(clmn=x, direction="worse", perspective="college"))) -1
c.better <- t(sapply(1:ncol(c.prefs), function(x) mapper(clmn=x, direction="better", perspective="college"))) -1
c.worseNA <- c.worse; c.worseNA[!is.na(c.worseNA)] <- 0; c.worseNA[is.na(c.worseNA)] <- 1
c.betterNA <- c.better; c.betterNA[!is.na(c.betterNA)] <- 0; c.betterNA[is.na(c.betterNA)] <- 1
c.worse[is.na(c.worse)] <- 0
c.better[is.na(c.better)] <- 0
s.worse <- sapply(1:ncol(s.prefs), function(x) mapper(clmn=x, direction="worse", perspective="student")) -1
s.better <- sapply(1:ncol(s.prefs), function(x) mapper(clmn=x, direction="better", perspective="student")) -1
s.worseNA <- s.worse; s.worseNA[!is.na(s.worseNA)] <- 0; s.worseNA[is.na(s.worseNA)] <- 1
s.betterNA <- s.better; s.betterNA[!is.na(s.betterNA)] <- 0; s.betterNA[is.na(s.betterNA)] <- 1
s.worse[is.na(s.worse)] <- 0
s.better[is.na(s.better)] <- 0
} else{
s.better <- NULL; c.better <- NULL; s.worse <- NULL; c.worse <- NULL
s.betterNA <- NULL; c.betterNA <- NULL; s.worseNA <- NULL; c.worseNA <- NULL
}
## ---------------------------------------------------------------------
## --- 4. Produce the datasets to be used based on formulas provided ---
if(is.null(SEL) & is.null(SELs) & is.null(SELc)){
c.OUT <- OUT[!duplicated(OUT$c.id),]
C <- data.frame(apply(data.frame(c.OUT[,colleges]), 2, function(i) i[match(indices$c.id,c.OUT$c.id)]))
names(C) <- colleges
s.OUT <- OUT[!duplicated(OUT$s.id),]
S <- data.frame(apply(data.frame(s.OUT[,students]), 2, function(i) i[match(indices$s.id,s.OUT$s.id)]))
names(S) <- students
## ... and interaction effects
Xmain <- data.frame(C, S)
Xmatch <- rFormula(formula = outcome, data=OUT)
if(method=="Klein"){
C <- rFormula(formula = selection.college, data=Xmain)
S <- rFormula(formula = selection.student, data=Xmain)
} else if(method == "Sorensen"){
C <- rFormula(formula = selection, data=Xmain)
}
} else{
if(method!="Klein-selection"){
Xmatch <- rFormula(formula = outcome, data=OUT)
} else{
Xmatch <- rFormula(formula = selection.student, data=OUT)
}
if(method=="Klein" | method=="Klein-selection"){
C <- rFormula(formula = selection.college, data=SELc)
S <- rFormula(formula = selection.student, data=SELs)
} else if(method == "Sorensen"){
C <- rFormula(formula = selection, data=SEL)
}
}
D <- indices$D
d <- which(D==1)
## -----------------------------
## --- 5. Return the results ---
if(method=="Klein" | method=="Klein-selection"){
return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,], S=S, Smatch=S[D==1,],
D=D, d=d, M=M, H=H, indices=indices$id,
c.better=c.better, c.worse=c.worse, s.better=s.better, s.worse=s.worse,
s.betterNA=s.betterNA, c.betterNA=c.betterNA, s.worseNA=s.worseNA, c.worseNA=c.worseNA) )
} else if(method=="Sorensen"){
return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,],
D=D, d=d, M=M, H=H, indices=indices$id) )
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.