R/stabit2.R

Defines functions stabit2 stabit2.default rFormula consensusMC stabit2_inner

Documented in stabit2

# ----------------------------------------------------------------------------
# R-code (www.r-project.org/) for the Structural Matching Model
#
# Copyright (c) 2016 Thilo Klein
#
# This library is distributed under the terms of the GNU Public License (GPL)
# for full details see the file LICENSE
#
# ----------------------------------------------------------------------------
#' @title Matching model and selection correction for college admissions
#'
#' @description The function provides a Gibbs sampler for a structural matching model that 
#' estimates preferences and corrects for sample selection bias when the selection process 
#' is a two-sided matching game; i.e., a matching of students to colleges.
#'
#' The structural model consists of a selection and an outcome equation. The \emph{Selection Equation} 
#' determines which matches are observed (\eqn{D=1}) and which are not (\eqn{D=0}).
#' \deqn{ \begin{array}{lcl}
#'        D &= & 1[V \in \Gamma] \\
#'        V &= & W\beta + \eta
#'        \end{array}
#'      }{ D = 1[V in \Gamma] with V = W\beta + \eta
#'      }
#' Here, \eqn{V} is a vector of latent valuations of \emph{all feasible} matches, ie observed and 
#' unobserved, and \eqn{1[.]} is the Iverson bracket. 
#' A match is observed if its match valuation is in the set of valuations \eqn{\Gamma}
#' that satisfy the equilibrium condition (see Sorensen, 2007). 
#' The match valuation \eqn{V} is a linear function of \eqn{W}, a matrix of characteristics for 
#' \emph{all feasible} matches, and \eqn{\eta}, a vector of random errors. \eqn{\beta} is a paramter 
#' vector to be estimated.
#' 
#' The \emph{Outcome Equation} determines the outcome for \emph{observed} matches. The dependent
#' variable can either be continuous or binary, dependent on the value of the \code{binary}
#' argument. In the binary case, the dependent variable \eqn{R} is determined by a threshold 
#' rule for the latent variable \eqn{Y}.
#' \deqn{ \begin{array}{lcl}
#'        R &= & 1[Y > c] \\
#'        Y &= & X\alpha + \epsilon
#'        \end{array}
#'      }{ R = 1[Y > c] with Y = X\alpha + \epsilon
#'      }
#' Here, \eqn{Y} is a linear function of \eqn{X}, a matrix of characteristics for \emph{observed} 
#' matches, and \eqn{\epsilon}, a vector of random errors. \eqn{\alpha} is a paramter vector to 
#' be estimated.
#' 
#' The structural model imposes a linear relationship between the error terms of both equations 
#' as \eqn{\epsilon = \kappa\eta + \nu}, where \eqn{\nu} is a vector of random errors and \eqn{\kappa}
#' is the covariance paramter to be estimated. If \eqn{\kappa} were zero, the marginal distributions
#' of \eqn{\epsilon} and \eqn{\eta} would be independent and the selection problem would vanish.
#' That is, the observed outcomes would be a random sample from the population of interest.
#' 
#' @param OUT data frame with characteristics of all observed matches, including
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param SEL optional: data frame with characteristics of all observed and unobserved matches, including 
#' market identifier \code{m.id}, college identifier \code{c.id} and student identifier \code{s.id}.
#' @param colleges character vector of variable names for college characteristics. These variables carry the same value for any college.
#' @param students character vector of variable names for student characteristics. These variables carry the same value for any student.
#' @param outcome formula for match outcomes.
#' @param selection formula for match valuations. 
#' @param binary logical: if \code{TRUE} outcome variable is taken to be binary; if \code{FALSE} outcome variable is taken to be continuous.
#' @param niter number of iterations to use for the Gibbs sampler.
#' @param gPrior logical: if \code{TRUE} the g-prior (Zellner, 1986) is used for the variance-covariance matrix. (Not yet implemented)
#' @param censored draws of the \code{kappa} parameter that estimates the covariation between the error terms in selection and outcome equation are 0:not censored, 1:censored from below, 2:censored from above.
#' @param thin integer indicating the level of thinning in the MCMC draws. The default \code{thin=1} saves every draw, \code{thin=2} every second, etc.
#' @param nCores number of cores to be used in parallel Gibbs sampling.
#' @param ... .
#' 
# @param selection.college formula for match valuations of colleges. Is ignored when \code{selection} is provided.
# @param selection.student formula for match valuations of students. Is ignored when \code{selection} is provided.
# @param s.prefs list of matrices (one element for each market) of dimension \code{nColleges} \code{x} \code{nStudents} with the \code{j}th 
# column containing student \code{j}'s ranking over colleges in decreasing order of 
# preference (i.e. most preferred first).
# @param c.prefs list of matrices (one element for each market) of dimension \code{nStudents} \code{x} \code{nColleges} with the \code{i}th 
# column containing college \code{i}'s ranking over students in decreasing order of 
# preference (i.e. most preferred first).
# @param SELs optional: same as \code{SEL} but for student valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
# @param SELc optional: same as \code{SEL} but for college valuation when estimating separate selection equations for college and student preferences. Is ignored when \code{SEL} is provided.
#' 
#' @export
#' 
#' @useDynLib matchingMarkets, .registration = TRUE 
#' 
#' @import stats lattice parallel RcppProgress
#' @importFrom Rcpp evalCpp
#' @importFrom graphics par plot
#' 
#' @author Thilo Klein 
#' 
#' @keywords regression
#' 
#' @references Sorensen, M. (2007). How Smart is Smart Money? A Two-Sided Matching Model of Venture Capital.
#' \emph{Journal of Finance}, 62 (6): 2725-2762.
#' 
#' @examples
#' \dontrun{
#' ## --- SIMULATED EXAMPLE ---
#' 
#' ## 1. Simulate two-sided matching data for 20 markets (m=20) with 100 students
#' ##    (nStudents=100) per market and 20 colleges with quotas of 5 students, each
#' ##    (nSlots=rep(5,20)). True parameters in selection and outcome equations are 
#' ##    all equal to 1.
#' 
#' xdata <- stabsim2(m=20, nStudents=100, nSlots=rep(5,20), verbose=FALSE,
#'   colleges = "c1", students = "s1",
#'   outcome = ~ c1:s1 + eta + nu,
#'   selection = ~ -1 + c1:s1 + eta
#' )
#' head(xdata$OUT)
#' 
#' 
#' ## 2. Correction for sorting bias when match valuations V are observed
#' 
#' ## 2-a. Bias from sorting
#'  lm1 <- lm(y ~ c1:s1, data=xdata$OUT)
#'  summary(lm1)
#' 
#' ## 2-b. Cause of the bias
#'  with(xdata$OUT, cor(c1*s1, eta))
#' 
#' ## 2-c. Correction for sorting bias
#'  lm2a <- lm(V ~ -1 + c1:s1, data=xdata$SEL); summary(lm2a)
#'  etahat <- lm2a$residuals[xdata$SEL$D==1]
#'  
#'  lm2b <- lm(y ~ c1:s1 + etahat, data=xdata$OUT)
#'  summary(lm2b)
#' 
#' 
#' ## 3. Correction for sorting bias when match valuations V are unobserved
#' 
#' ## 3-a. Run Gibbs sampler (when SEL is given)
#'  fit2 <- stabit2(OUT = xdata$OUT, 
#'            SEL = xdata$SEL,
#'            outcome = y ~ c1:s1, 
#'            selection = ~ -1 + c1:s1,
#'            niter=1000
#'  )
#'
#' ## 3-b. Alternatively: Run Gibbs sampler (when SEL is not given)
#'  fit2 <- stabit2(OUT = xdata$OUT, 
#'            colleges = "c1",
#'            students = "s1",
#'            outcome = y ~ c1:s1, 
#'            selection = ~ -1 + c1:s1,
#'            niter=1000
#'  )
#'
#'
#' ## 4. Implemented methods
#'
#' ## 4-a. Get coefficients
#'  fit2
#'  
#' ## 4-b. Coefficient table
#'  summary(fit2)
#'  
#' ## 4-c. Get marginal effects
#'  summary(fit2, mfx=TRUE)
#'  
#' ## 4-d. Also try the following functions
#'  #coef(fit2)
#'  #fitted(fit2)
#'  #residuals(fit2)
#'  #predict(fit2, newdata=NULL)
#'
#'    
#' ## 5. Plot MCMC draws for coefficients
#'  plot(fit2)
#' }
stabit2 <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
                    binary=FALSE, niter, gPrior=FALSE, 
                    censored=1, thin=1, nCores=max(1,detectCores()-1), ...) UseMethod("stabit2")


#' @export
stabit2.default <- function(OUT=NULL, SEL=NULL, colleges=NULL, students=NULL, outcome=NULL, selection,
                            binary=FALSE, niter, gPrior=FALSE, 
                            censored=1, thin=1, nCores=max(1,detectCores()-1), ...){
  
  ## ------------------------
  ## --- 1. Preliminaries ---
  
  ## select method based on arguments provided
  if(is.list(selection)){
    method <- "Klein" # separate selection equations for students and college preferences
    selection.college <- selection$college
    selection.student <- selection$student
    selection <- NULL
    if(is.null(OUT)){
      method <- "Klein-selection" # no outcome equation
      OUT <- SEL$SELs[SEL$SELs$D==1,]
      OUT$Y <- rep(0,nrow(OUT))
    }
  } else{
    method <- "Sorensen" # single selection equation with equal sharing rule for student and college utility
  }
  
  if(!is.data.frame(SEL)){
    SELs <- SEL$SELs
    SELc <- SEL$SELc
    s.prefs <- SEL$s.prefs
    c.prefs <- SEL$c.prefs
    SEL <- NULL
  }
  
  ## split input data by market
  OUT <- split(OUT, OUT$m.id)
  if(!is.null(SEL)){
    SEL <- split(SEL, SEL$m.id) 
    SELs <- NULL
    SELc <- NULL
  } else if(!is.null(SELs) & !is.null(SELc)){
    SELs <- split(SELs, SELs$m.id) 
    SELc <- split(SELc, SELc$m.id) 
  }
  
  ## number of cores need not exceed number of markets
  nCores <- min(length(SELc), nCores)
  
  ## market and agent identifiers
  m.id <- "m.id"
  c.id <- "c.id"
  s.id <- "s.id"
  
  ## empty lists for results
  Y=list(); Xmatch=list(); C=list(); Cmatch=list(); S=list(); Smatch=list(); D=list(); d=list()
  M=list(); H=list(); H2=list(); copt.id=vector(); indices=list()
  
  for(i in 1:length(OUT)){
    
    ## ------------------------------------------------------------------------------------------
    ## --- 2. Bring data in consistent order and produce matrices for all equilibrium matches ---
    
    X <- stabit2_inner(iter=i, OUT=OUT[[i]], SEL=SEL[[i]], SELs=SELs[[i]], SELc=SELc[[i]],
                       colleges=colleges, students=students, outcome=outcome, selection=selection,
                       selection.student=selection.student, selection.college=selection.college, 
                       s.prefs=s.prefs, c.prefs=c.prefs,
                       method=method)
    
    ## continue to write the results per market
    Y[[i]]      <- as.matrix(X$Y)
    Xmatch[[i]] <- as.matrix(X$Xmatch)
    C[[i]]      <- as.matrix(X$C)
    Cmatch[[i]] <- as.matrix(X$Cmatch)
    D[[i]]      <- as.matrix(X$D)
    d[[i]]      <- as.matrix(X$d) - 1
    M[[i]]      <- as.matrix(X$M) - 1
    indices[[i]] <- X$indices
    if(method=="Klein" | method=="Klein-selection"){
      S[[i]]      <- as.matrix(X$S)
      Smatch[[i]] <- as.matrix(X$Smatch)
      H[[i]]      <- X$H
      H2[[i]]     <- X$H2
      copt.id[i]  <- X$copt.id
    } else{
      H[[i]]      <- as.matrix(X$H)
    }
  }
  
  ## some preliminaries
  T <- length(Y); #// Number of markets.
  nColleges <- nStudents <- XXmatch <- CC <- SS <- CCmatch <- SSmatch <- list()
  L <- rep(list(vector()),T)
  if(method=="Klein" | method=="Klein-selection"){
    studentIds <- collegeId <- rep(list(matrix()),T)
  } else{
    studentIds <- collegeId <- rep(list(vector()),T)
  }
  
  ## further results
  s <- 0
  for(i in 1:T){
    if(method=="Klein" | method=="Klein-selection"){
      nColleges[[i]] <- nrow(H[[i]][,,1])
      nStudents[[i]] <- ncol(H[[i]][,,1])
    } else{
      nColleges[[i]] <- nrow(H[[i]])
      nStudents[[i]] <- ncol(H[[i]])
    }
    XXmatch[[i]]   <- t(Xmatch[[i]]) %*% Xmatch[[i]]
    CC[[i]]        <- t(C[[i]]) %*% C[[i]]
    CCmatch[[i]]   <- t(Cmatch[[i]]) %*% Cmatch[[i]]
    if(method=="Klein" | method=="Klein-selection"){
      SSmatch[[i]]   <- t(Smatch[[i]]) %*% Smatch[[i]]
      SS[[i]]        <- t(S[[i]]) %*% S[[i]]
      collegeId[[i]] <- matrix(NA, nrow=nStudents[[i]], ncol=dim(H[[i]])[3]) ##
    }
    
    ## Record the id's of students matched to each college, and the id of the college matched to each student.
    for(j in 1:nColleges[[i]]){
      s <- s+1
      L[[i]][j] <- s - 1
      if(method=="Klein" | method=="Klein-selection"){
        studentIds[[s]] <- matrix(NA, nrow=length(which(H[[i]][j,,1] == 1)), ncol=dim(H[[i]])[3])
        for(k in 1:dim(H[[i]])[3]){
          studentIds[[s]][,k] <- which(H[[i]][j,,k] == 1) - 1
        }
      } else{
        studentIds[[s]] <- which(H[[i]][j,] == 1) - 1
      }      
    }
    
    for(j in 1:nStudents[[i]]){
      if(method=="Klein" | method=="Klein-selection"){
        for(k in 1:dim(H[[i]])[3]){
          collegeId[[i]][j,k] <- which(H[[i]][,j,k] == 1) - 1
        }
      } else{
        collegeId[[i]][j] <- which(H[[i]][,j] == 1) - 1
      }      
    }
  }
  n <- sum(unlist(nStudents)) # total number of students/matches
  N <- sum(unlist(nColleges)) # total number of colleges
  nEquilibs <- unlist(lapply(H, function(z) dim(z)[3])) # number of equilibria
  
  ## ---------------------------------------------------------------------------------------------
  ## --- 3. Mapping of hospital/residents problem (HR) to related stable marriage problem (SM) ---
  
  if(method=="Klein" | method=="Klein-selection"){
    
    sopt.id <- 1 ## student-optimal matching is first in lists
    
    sopt2equ <- list()
    for(i in 1:length(H2)){ ## for each market i
      h <- apply(H2[[i]][,,sopt.id], 2, function(z) which(z==1))
      sopt2equ[[i]] <- -1 + sapply(1:dim(H2[[i]])[3], function(z){
        sapply(1:length(h), function(j){
          which(H2[[i]][h[j],,z]==1)
        })
      }) 
    }
    
    copt2equ <- list() ##!!!
    for(i in 1:length(H2)){
      h <- apply(H2[[i]][,,copt.id[i]], 2, function(z) which(z==1))
      copt2equ[[i]] <- -1 + sapply(1:dim(H2[[i]])[3], function(z){
        sapply(1:length(h), function(j){
          which(H2[[i]][h[j],,z]==1)
        })
      }) 
    }
    
    equ2sopt <- list()
    for(i in 1:length(H2)){
      h <- sapply(1:dim(H2[[i]])[3], function(z){
        apply(H2[[i]][,,z], 2, function(j){
          which(j==1)
        })
      })
      equ2sopt[[i]] <- -1 + sapply(1:dim(H2[[i]])[3], function(z){
        sapply(1:nrow(h), function(j){
          which(H2[[i]][h[j,z],,sopt.id]==1)
        })
      }) 
    }
    
    equ2copt <- list() ##!!!
    for(i in 1:length(H2)){
      h <- sapply(1:dim(H2[[i]])[3], function(z){
        apply(H2[[i]][,,z], 2, function(j){
          which(j==1)
        })
      })
      equ2copt[[i]] <- -1 + sapply(1:dim(H2[[i]])[3], function(z){
        sapply(1:nrow(h), function(j){
          which(H2[[i]][h[j,z],,copt.id[i]]==1)
        })
      }) 
    }
  }
  
  ## drop unused objects
  rm(H2)
  
  ## adjust index for college-optimal matching for C++
  copt.id <- copt.id -1
  
  ## ----------------------------
  ## --- 4. Run Gibbs sampler ---
  
  #save.image("~/Desktop/play.RData")
  #load("~/Desktop/play.RData")
  
  if(nCores > 1){
    
    ## split market ids (Ts) and Ns evenly to the number of cores (nCores)
    Ts <- split(1:T, cut(1:T, breaks=nCores, labels=FALSE))
    Ns <- list()
    for(i in 1:length(Ts)){
      if(i==1){
        Ns[[i]] <- 1:sum(unlist(nColleges[ Ts[[i]] ]))
      } else{
        Ns[[i]] <- max(Ns[[i-1]]) + ( 1:sum(unlist(nColleges[ Ts[[i]] ])) )
        L[ Ts[[i]] ] <- lapply(L[ Ts[[i]] ], function(z){
          z - min(unlist(L[ Ts[[i]] ])) 
        })
      }
    }  
    
    ## setup cluster
    cl <- makeCluster(nCores) 
    clusterEvalQ(cl, library(matchingMarkets))
    
    if(method == "Klein"){
      
      ## to avoid 
      parObject <- lapply(1:nCores, function(i){
        list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], 
        S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]], 
        H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]), 
        XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], SSmatch=SSmatch[Ts[[i]]], 
        CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], studentIds=studentIds[Ns[[i]]], 
        collegeId=collegeId[Ts[[i]]], nEquilibs=nEquilibs[Ts[[i]]], equ2sopt=equ2sopt[Ts[[i]]], 
        sopt2equ=sopt2equ[Ts[[i]]], equ2copt=equ2copt[Ts[[i]]], copt2equ=copt2equ[Ts[[i]]], 
        coptid=copt.id[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])), 
        binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
      })
      
      cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
      res <- parLapply(cl, parObject, function(d){
        
        with(d, stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, 
                            Sr=S, Smatchr=Smatch, Dr=D, dr=d, Mr=M, 
                            Hr=H, nCollegesr=nColleges, nStudentsr=nStudents, 
                            XXmatchr=XXmatch, CCr=CC, SSr=SS, SSmatchr=SSmatch, 
                            CCmatchr=CCmatch, Lr=L, studentIdsr=studentIds, 
                            collegeIdr=collegeId, nEquilibsr=nEquilibs, equ2soptr=equ2sopt, 
                            sopt2equr=sopt2equ, equ2coptr=equ2copt, copt2equr=copt2equ, 
                            coptidr=coptid, n=n, N=N, 
                            binary=binary, niter=niter, thin=thin, T=T, censored=censored))
      })
      
    } else if(method == "Klein-selection"){
      
      ## to avoid 
      parObject <- lapply(1:nCores, function(i){
        list(C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], S=S[Ts[[i]]], Smatch=Smatch[Ts[[i]]], D=D[Ts[[i]]], 
             d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], nColleges=unlist(nColleges[Ts[[i]]]), 
             nStudents=unlist(nStudents[Ts[[i]]]), CC=CC[Ts[[i]]], SS=SS[Ts[[i]]], 
             SSmatch=SSmatch[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], 
             studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], nEquilibs=nEquilibs[Ts[[i]]],
             equ2sopt=equ2sopt[Ts[[i]]], sopt2equ=sopt2equ[Ts[[i]]], equ2copt=equ2copt[Ts[[i]]], 
             copt2equ=copt2equ[Ts[[i]]], coptid=copt.id[Ts[[i]]], n=sum(unlist(nStudents[Ts[[i]]])), 
             N=sum(unlist(nColleges[Ts[[i]]])), niter=niter, thin=thin, T=length(Ts[[i]]))
      })
      
      cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
      res <- parLapply(cl, parObject, function(d){  
        
        with(d, stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, 
                            dr=d, Mr=M, Hr=H, nCollegesr=nColleges, 
                            nStudentsr=nStudents, CCr=CC, SSr=SS, 
                            SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L, 
                            studentIdsr=studentIds, collegeIdr=collegeId, nEquilibsr=nEquilibs,
                            equ2soptr=equ2sopt, sopt2equr=sopt2equ, equ2coptr=equ2copt, 
                            copt2equr=copt2equ, coptidr=coptid, n=n, 
                            N=N, niter=niter, thin=thin, T=T))
      })
      
    } else if(method == "Sorensen"){

      ## to avoid 
      parObject <- lapply(1:nCores, function(i){
        list(Y=Y[Ts[[i]]], Xmatch=Xmatch[Ts[[i]]], C=C[Ts[[i]]], Cmatch=Cmatch[Ts[[i]]], 
             D=D[Ts[[i]]], d=d[Ts[[i]]], M=M[Ts[[i]]], H=H[Ts[[i]]], 
             nColleges=unlist(nColleges[Ts[[i]]]), nStudents=unlist(nStudents[Ts[[i]]]), 
             XXmatch=XXmatch[Ts[[i]]], CC=CC[Ts[[i]]], CCmatch=CCmatch[Ts[[i]]], L=L[Ts[[i]]], 
             studentIds=studentIds[Ns[[i]]], collegeId=collegeId[Ts[[i]]], 
             n=sum(unlist(nStudents[Ts[[i]]])), N=sum(unlist(nColleges[Ts[[i]]])), 
             binary=binary, niter=niter, thin=thin, T=length(Ts[[i]]), censored=censored)
      })
      
      cat(paste("Running parallel Gibbs sampler on", nCores, "cores...", sep=" "))
      res <- parLapply(cl, parObject, function(d){  
        
        with(d, stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, 
                            Dr=D, dr=d, Mr=M, Hr=H, 
                            nCollegesr=nColleges, nStudentsr=nStudents, 
                            XXmatchr=XXmatch, CCr=CC, CCmatchr=CCmatch, Lr=L, 
                            studentIdsr=studentIds, collegeIdr=collegeId, 
                            n=n, N=N, 
                            binary=binary, niter=niter, thin=thin, T=T, censored=censored))
      })
    }
    
    rm(parObject, Ts, Ns)
    stopCluster(cl)
    
    ## split results into draws for paramters (res1) and error terms (res2)
    res1 <- lapply(res, function(z) z[!names(z) %in% c("etadraws", "deltadraws")])
    res2 <- lapply(res, function(z) z[names(z) %in% c("etadraws", "deltadraws")])
    rm(res)
    
    ## combine results from different cores for error terms
    est2 <- lapply(1:length(res2[[1]]), function(z){
      do.call(rbind, lapply(res2, function(d) d[[z]] ))
    })
    names(est2) <- names(res2[[1]]); rm(res2)
    
    ## combine results from different cores for parameter draws
    toarray <- function(x, y){
      array(unlist(lapply(x, function(z) z[[y]] )), 
            dim = c(nrow(res1[[1]][[y]]), ncol(res1[[1]][[y]]), length(res1)) )
    }
    est1 <- sapply(1:length(res1[[1]]), function(z){
      toarray( res1, names(res1[[1]])[z] )
    })
    names(est1) <- names(res1[[1]]); rm(res1)
    est1 <- lapply(est1, consensusMC)
    
    ## combine results for paramters (est1) and error terms (est2)
    est <- c(est1, est2); rm(est1, est2)
    
  } else{
    #Ts <- 1:T
    #Ns <- 1:N  
    
    if(method=="Klein"){
      
      est <- stabit2Sel1(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
                         Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch, 
                         CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
                         studentIdsr=studentIds, collegeIdr=collegeId, nEquilibsr=nEquilibs,
                         equ2soptr=equ2sopt, sopt2equr=sopt2equ, equ2coptr=equ2copt, copt2equr=copt2equ,
                         coptidr=copt.id, n=n, N=N, binary=binary, niter=niter, thin=thin, T=T, censored=censored)
      
    } else if(method=="Klein-selection"){
      
      est <- stabit2Mat1(Cr=C, Cmatchr=Cmatch, Sr=S, Smatchr=Smatch, Dr=D, dr=d,
                         Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents),
                         CCr=CC, SSr=SS, SSmatchr=SSmatch, CCmatchr=CCmatch, Lr=L,
                         studentIdsr=studentIds, collegeIdr=collegeId, nEquilibsr=nEquilibs,
                         equ2soptr=equ2sopt, sopt2equr=sopt2equ, equ2coptr=equ2copt, copt2equr=copt2equ,
                         coptidr=copt.id, n=n, N=N, niter=niter, thin=thin, T=T)
      
    } else if(method=="Sorensen"){
      
      est <- stabit2Sel2(Yr=Y, Xmatchr=Xmatch, Cr=C, Cmatchr=Cmatch, Dr=D, dr=d,
                         Mr=M, Hr=H, nCollegesr=unlist(nColleges), nStudentsr=unlist(nStudents), XXmatchr=XXmatch, 
                         CCr=CC, CCmatchr=CCmatch, Lr=L, 
                         studentIdsr=studentIds, collegeIdr=collegeId, 
                         n=n, N=N, binary=binary, niter=niter, thin=thin, T=T, censored=censored)
    }
  }
  
  # ------------------------------------
  # --- 5. Add names to coefficients ---
  
  ## variable names
  
  if(method!="Klein-selection"){
    an <- colnames(Xmatch[[1]])
  }
  bn <- colnames(C[[1]])
  if(method=="Klein" | method=="Klein-selection"){
    cn <- colnames(S[[1]])
  }
  
  ## parameter draws
  
  rownames(est$betadraws) <- bn
  if(method=="Sorensen"){
    est$alphadraws <- with(est, rbind(alphadraws, kappadraws))
    rownames(est$alphadraws) <- c(an,"kappa")
    est$kappadraws <- NULL
  } else if(method=="Klein"){
    est$alphadraws <- with(est, rbind(alphadraws, kappadraws, lambdadraws))
    rownames(est$alphadraws) <- c(an,"kappa","lambda")
    est$kappadraws  <- NULL
    est$lambdadraws <- NULL
    rownames(est$gammadraws) = cn
  } else if(method=="Klein-selection"){
    rownames(est$gammadraws) = cn
  }
  
  ## posterior means
  
  ## The last half of all draws are used in approximating the posterior means and the posterior standard deviations.
  niter <- dim(est$betadraws)[2]
  startiter <- floor(niter/2)
  
  posMeans <- function(x){
    sapply(1:nrow(x), function(z) mean(x[z,startiter:niter]))
  }
  
  if(method!="Klein-selection"){
    est$alpha <- posMeans(est$alphadraws) 
    names(est$alpha) <- rownames(est$alphadraws)
  }
  
  est$beta <- posMeans(est$betadraws)
  names(est$beta) <- bn
  
  if(method=="Klein" | method=="Klein-selection"){
    est$gamma <- posMeans(est$gammadraws)
    names(est$gamma) <- cn
  }
  
  if(method!="Klein-selection" & binary==FALSE){
    est$sigma <- sqrt(posMeans(est$sigmasquarenudraws))
    est$sigmasquarenudraws <- NULL
  }
  
  ## error terms
  
  est$eta <- posMeans(est$etadraws)
  est$etadraws <- NULL
  if(method=="Klein" | method=="Klein-selection"){
    est$delta <- posMeans(est$deltadraws)
    est$deltadraws <- NULL
  }
  
  ## vcov
  
  if(method!="Klein-selection"){
    est$vcov$alpha <- var(t(est$alphadraws))
  }
  est$vcov$beta <- var(t(est$betadraws))
  if(method=="Klein" | method=="Klein-selection"){
    est$vcov$gamma <- var(t(est$gammadraws))
  }
  
  ## variables
  
  est$C <- do.call(rbind, C)
  est$D <- do.call(c, D)
  
  if(method!="Klein-selection"){
    est$Y <- do.call(c, Y)
    est$X <- do.call(rbind, Xmatch)
    
    if(method=="Klein"){
      est$X <- cbind(est$X, eta=est$eta, delta=est$delta)
      est$S <- do.call(rbind, S)  
    } else if(method=="Sorensen"){
      est$X <- cbind(est$X, eta=est$eta)
    }
  } else if(method=="Klein-selection"){
    est$S <- do.call(rbind, S)
  } 
  
  ## other output
  
  if(method=="Sorensen"){
    
    est$coefficients <- unlist(with(est, list(o=alpha, s=beta)))
    
  } else if(method=="Klein"){
    
    est$coefficients <- unlist(with(est, list(o=alpha, c=beta, s=gamma)))
    
  } else if(method=="Klein-selection"){
    
    est$coefficients <- unlist(with(est, list(c=beta, s=gamma)))
    
  }
  
  if(method!="Klein-selection"){
    est$fitted.values <- with(est, as.vector(X %*% alpha))
    est$residuals <- est$Y - est$fitted.values
    est$df <- n - ncol(est$X)
    est$binary <- binary
    est$formula <- outcome
  } else{
    #est$fitted.values <- with(est, as.vector(X %*% alpha))
    #est$residuals <- est$Y - est$fitted.values
    est$df <- n*N - ncol(est$C) - ncol(est$S)
    #est$binary <- binary
    #est$formula <- outcome
  }
  
  est$call <- match.call()
  est$method <- method
  
  ## consolidate
  
  h <- est[grep(pattern="draws",x=names(est))]
  est[grep(pattern="draws",x=names(est))] <- NULL
  est$draws <- h
  
  h <- est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))]
  est[which(names(est) %in% c("alpha","beta","gamma","kappa","lambda"))] <- NULL
  est$coefs <- h
  
  h <- est[which(names(est) %in% c("X","S","C","Y","D"))]
  est[which(names(est) %in% c("X","S","C","Y","D"))] <- NULL
  est$variables <- h
  
  h <- NULL
  
  # ------------------
  # --- 6. Returns ---
  
  class(est) <- "stabit2"
  return(est)
}




rFormula <- function(formula, data=list(), ...){
  mf <- model.frame(formula=formula, data=data)
  x <- model.matrix(attr(mf, "terms"), data=mf)
  y <- model.response(mf)
  as.data.frame(cbind(y,x))
}




consensusMC <- function(subchain){
  
  ddata = length(dim(subchain))
  
  d          <- dim(subchain)[1]  
  sampletotT <- dim(subchain)[2]    
  M          <- dim(subchain)[3]
  
  if ( M==1 ){ 
    theta <- array(subchain[,,1],c(d,sampletotT))
    return (theta)
  }
  
  ## compute sigmahatm & sigmahatm.inverse (= W_s)  
  sigmahatm     <- array(NA,c(d,d,M))    
  sigmahatM     <- matrix(NA,d,d)
  sigmahatM.pre <- matrix(NA,d,d)        
  if (d==1){
    for (k in 1:M) { sigmahatm[1,,k] <- var(subchain[1,,k]) }    
  } else{
    for (k in 1:M) { sigmahatm[,,k] <- cov(t(subchain[,,k])) }
  }
  
  ## compute inverses of covariance matrices (with try()):
  sigmahatm.inverse <- array(NA,dim=c(d,d,M))
  for (k in 1:M){    
    res <- try( sigmahatm.inverse[,,k] <- solve(sigmahatm[,,k]), silent=TRUE)  
  }            
  sigmahatM <- solve(rowSums(sigmahatm.inverse, dims=2)) # (sum W_s)^(-1)
  
  ## compute unified posterior samples
  theta <- matrix(NA,nrow=d,ncol=sampletotT) # the resulting posterior samples
  wvec  <- array(NA, c(d,1))
  for (i in 1:sampletotT){
    wvec <- rep(0,d)
    for (s in 1:M){
      wvec <- wvec + sigmahatm.inverse[,,s] %*% subchain[,i,s] 
    }
    theta[,i] <- sigmahatM %*% wvec        
  }
  return(theta)
}




stabit2_inner <- function(iter, OUT, SEL, SELs, SELc, colleges, students, 
                          m.id="m.id", c.id="c.id", s.id="s.id", 
                          outcome, selection, selection.student, selection.college, 
                          s.prefs, c.prefs, method){
  
  ## ----------------------------------------------------------------------------------------------
  ## --- 1. Sort datasets by colleges (c.id) and students (s.id), put equilibrium matches first ---
  
  if(!is.null(SEL)){
  
    ## sort SEL by c.id and s.id (based on OUT)
    idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
    SEL$idSEL <- with(SEL, paste(c.id, s.id, sep="_"))
    
    ## which matches in SEL are in OUT?
    SEL_top <- SEL[SEL$idSEL %in% idOUT,]
    SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
    SEL_bot <- SEL[! SEL$idSEL %in% idOUT,]
    SEL <- rbind(SEL_top, SEL_bot)
    rm(SEL_top, SEL_bot)
    
    ## unique student and college ids
    indices <- SEL
    uColleges <- sort(unique(indices$c.id))
    uStudents <- sort(unique(indices$s.id))
    
  } else if(!is.null(SELs) & !is.null(SELc)){
    
    ## sort SELs/SELc by c.id and s.id (based on OUT)
    idOUT <- with(OUT, paste(c.id, s.id, sep="_"))
    SELs$idSEL <- with(SELs, paste(c.id, s.id, sep="_"))
    SELc$idSEL <- with(SELc, paste(c.id, s.id, sep="_"))
    
    ## which matches in SELs are in OUT?
    SEL_top <- SELs[SELs$idSEL %in% idOUT,]
    SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
    SEL_bot <- SELs[! SELs$idSEL %in% idOUT,]
    SELs <- rbind(SEL_top, SEL_bot)
    rm(SEL_top, SEL_bot)
    
    ## which matches in SELc are in OUT?
    SEL_top <- SELc[SELc$idSEL %in% idOUT,]
    SEL_top <- SEL_top[match(idOUT, SEL_top$idSEL),]
    SEL_bot <- SELc[! SELc$idSEL %in% idOUT,]
    SELc <- rbind(SEL_top, SEL_bot)
    rm(SEL_top, SEL_bot)
    
    ## unique student and college ids
    indices <- SELs
    uColleges <- sort(unique(indices$c.id))
    uStudents <- sort(unique(indices$s.id))
    
  } else{
    
    ## get college and student ids from OUT
    c.id <- OUT[,c.id]
    s.id <- OUT[,s.id]
    
    ## unique student and college ids
    uColleges <- sort(unique(c.id))
    uStudents <- sort(unique(s.id))
    
    ## all feasible combinations
    combs <- function(uColleges, uStudents){
      nColleges <- length(uColleges)
      nStudents <- length(uStudents)
      data.frame( c.id = c(sapply(uColleges, function(i){ rep(i, nStudents) })), 
                  s.id = rep(uStudents, nColleges), 
                  stringsAsFactors=FALSE )
    }
    indices <- as.data.frame(combs(uColleges, uStudents))
    
    ## index equilibrium matches from x in indices
    indices$id <- paste(indices$c.id, indices$s.id, sep="_")
    OUT$id    <- paste(OUT$c.id, OUT$s.id, sep="_")
    
    ## sort indices such that observed matches come first
    indices$D <- ifelse(indices$id %in% OUT$id, 1, 0)
    indices <- indices[order(indices$D, decreasing=TRUE),]
    
    ## sort OUT such that it concordes with indices
    OUT    <- OUT[match(indices$id[indices$D==1], OUT$id), ] 
  }
  
  ## -----------------------------------------------------------------------------------------
  ## --- 2. Produce matrices indicating the matches (H) and their position in the data (M) ---
  
  ## matrices are of dimension nColleges x nStudents
  ind.new       <- indices
  ind.new$index <- 1:nrow(ind.new)
  ind.org <- ind.new[order(ind.new$c.id,ind.new$s.id),]
  M <- matrix(ind.org$index, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)
  H <- matrix(ind.org$D, nrow=length(uColleges), ncol=length(uStudents), byrow=TRUE)  
  
  if(method == "Klein" | method=="Klein-selection"){
    
    ## preference inputs
    s.prefs <- s.prefs[[iter]]
    c.prefs <- c.prefs[[iter]]
    nSlots <- rowSums(H)
    
    ## find all stable matchings
    res <- hri(s.prefs=s.prefs, c.prefs=c.prefs, nSlots=nSlots)$matchings
    copt.id <- unique(res$matching[res$cOptimal==1]) # college-optimal matching
    res <- split(res, as.factor(res$matching))
    
    ## create adjacency matrices for all equilibrium matchings
    myfun <- function(x, type){
      H <- array(0, dim=c(length(unique(x[[1]][,type])), nrow(x[[1]]), length(x)))
      for(j in 1:length(x)){
        for(z in 1:nrow(x[[1]])){
          H[x[[j]][z,type], x[[j]][z,"student"], j] <- 1
        }
      }
      return(H)
    }
    H1 <- myfun(x=res, type="college"); names(H1) <- NULL # college admissions problem
    H2 <- myfun(x=res, type="slots"); names(H2) <- NULL # related stable marriage problem
    
    ## consistency check
    if( length( table(c(H) == c(H1[,,1]))) > 1 ){
      stop(paste("Data provided is not the student-optimal matching obtained from the 
                 preference lists in market ", iter, ".", sep=""))
    } else{
      H <- H1 # replace matrix for student-optimal matching (H) with all matchings (H1)
      rm(H1)
    }
  }
  
  ## ---------------------------------------------------------------------
  ## --- 3. Produce the datasets to be used based on formulas provided ---
  
  if(is.null(SEL) & is.null(SELs) & is.null(SELc)){
    
    c.OUT <- OUT[!duplicated(OUT$c.id),]
    C <- data.frame(apply(data.frame(c.OUT[,colleges]), 2, function(i) i[match(indices$c.id,c.OUT$c.id)]))
    names(C) <- colleges
    
    s.OUT <- OUT[!duplicated(OUT$s.id),]
    S <- data.frame(apply(data.frame(s.OUT[,students]), 2, function(i) i[match(indices$s.id,s.OUT$s.id)]))
    names(S) <- students
    
    ## ... and interaction effects
    Xmain <- data.frame(C, S)
    Xmatch <- rFormula(formula = outcome, data=OUT)
    
    if(method=="Klein"){
      
      C <- rFormula(formula = selection.college, data=Xmain)
      S <- rFormula(formula = selection.student, data=Xmain)    
      
    } else if(method == "Sorensen"){
      
      C <- rFormula(formula = selection, data=Xmain)
    }
    
  } else{
    
    if(method!="Klein-selection"){
      Xmatch <- rFormula(formula = outcome, data=OUT)
    } else{
      Xmatch <- rFormula(formula = selection.student, data=OUT)
    }
    
    if(method=="Klein" | method=="Klein-selection"){
      
      C <- rFormula(formula = selection.college, data=SELc)
      S <- rFormula(formula = selection.student, data=SELs)
      
    } else if(method == "Sorensen"){
      
      C <- rFormula(formula = selection, data=SEL)
    }
  }
  
  D <- indices$D
  d <- which(D==1)
  
  ## -----------------------------
  ## --- 4. Return the results ---
  
  if(method=="Klein" | method=="Klein-selection"){
    
    return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,], S=S, Smatch=S[D==1,], 
                 D=D, d=d, M=M, H=H, H2=H2, copt.id=copt.id, indices=indices$id) )
    
  } else if(method=="Sorensen"){
    
    return( list(Y=Xmatch[,1], Xmatch=Xmatch[,-1], C=C, Cmatch=C[D==1,], 
                 D=D, d=d, M=M, H=H, indices=indices$id) )
  }  
}  

Try the matchingMarkets package in your browser

Any scripts or data that you put into this service are public.

matchingMarkets documentation built on May 2, 2019, 4:49 p.m.