Nothing
#' Compute the population distribution of pairs and singles from a Revealed Preference Matchings Model
#'
#' \code{\link{rpmpopulationpmf}} computes the probability mass function for a population of the pairs and singles
#' from a Revealed Preference Matchings Model based on arbitary availability distribution and
#' preferences. It is typically based on the estimate from a \code{rpm()} call.
#'
#' The function \code{\link{rpm}} is used to fit a revealed preference model
#' for men and women of certain
#' characteristics (or shared characteristics) of people of the opposite sex.
#' The model assumes a one-to-one stable matching using an observed set of
#' matchings and a set of (possibly dyadic) covariates to
#' estimate the parameters for
#' linear equations of utilities.
#' It does this using an large-population likelihood based on ideas from Dagsvik (2000), Menzel (2015) and Goyal et al (2023).
#'
#' The model represents the dyadic utility functions as deterministic linear utility functions of
#' dyadic variables. These utility functions are functions of observed characteristics of the women
#' and men.
#' These functions are entered as terms in the function call
#' to \code{\link{rpm}}. This function simulates from such a model.
#'
#' @param object list; an object of class\code{rpm} that is typically the result of a call to \code{rpm()}.
#' @param N integer; The total population size. This must be set. The number of women and men are derived from the
#' (weighted) data.
#' @param num_women integer; (Optional) The number of women in the population.
#' @param num_men integer; (Optional) The number of men in the population.
#' @param pmfW vector; (Optional) The population proportions of the numbers of women of each type.
#' This should be compatible with the type in the object.
#' @param pmfM vector; (Optional) The population proportions of the numbers of men of each type.
#' This should be compatible with the type in the object.
#' @param verbose logical; Should verbose messages be printed out.
#' @return A list of data.frame, each a simulation from the population.
#' @keywords models
#' @examples
#' library(rpm)
#' data(fauxmatching)
#' \donttest{
#' fit <- rpm(~match("edu") + WtoM_diff("edu",3),
#' Xdata=fauxmatching$Xdata, Zdata=fauxmatching$Zdata,
#' X_w="X_w", Z_w="Z_w",
#' pair_w="pair_w", pair_id="pair_id", Xid="pid", Zid="pid",
#' sampled="sampled")
#' a <- rpmpopulationpmf(fit)
#' }
#' @references
#'
#' Goyal, Shuchi; Handcock, Mark S.; Jackson, Heide M.; Rendall, Michael S. and Yeung, Fiona C. (2023).
#' \emph{A Practical Revealed Preference Model for Separating Preferences and Availability Effects in Marriage Formation},
#' \emph{Journal of the Royal Statistical Society}, A. \doi{10.1093/jrsssa/qnad031}
#'
#' Dagsvik, John K. (2000) \emph{Aggregation in Matching Markets} \emph{International Economic Review},, Vol. 41, 27-57.
#' JSTOR: https://www.jstor.org/stable/2648822, \doi{10.1111/1468-2354.00054}
#'
#' Menzel, Konrad (2015).
#' \emph{Large Matching Markets as Two-Sided Demand Systems}
#' Econometrica, Vol. 83, No. 3 (May, 2015), 897-941. \doi{10.3982/ECTA12299}
#' @export rpmpopulationpmf
rpmpopulationpmf <- function(object, N = 2000, num_women=NULL, num_men=NULL, pmfW=NULL, pmfM=NULL, verbose = FALSE)
{
if(!(is(object, "rpm"))){stop("'rpmpopulationpmf' only works on objects of class 'rpm'.")}
if (object$sampling_design != "census") {
# Presumes individual weights
N_w = sum(object$Xdata[object$Xdata[,object$sampled] & is.na(object$Xdata[,object$pair_id]),object$X_w])
N_w = N_w + sum(object$Zdata[object$Zdata[,object$sampled] & !is.na(object$Zdata[,object$pair_id]),object$Z_w]) # number of women
N_m = sum(object$Zdata[object$Zdata[,object$sampled] & is.na(object$Zdata[,object$pair_id]),object$Z_w])
N_m = N_m + sum(object$Xdata[object$Xdata[,object$sampled] & !is.na(object$Xdata[,object$pair_id]),object$X_w]) # number of men
}else{
N_w = nrow(object$Xdata)
N_m = nrow(object$Zdata)
if(is.null(object$sampled)){
object$sampled <- "sampled"
object$Xdata$sampled <- rep(TRUE,nrow(object$Xdata))
object$Zdata$sampled <- rep(TRUE,nrow(object$Zdata))
}else{
object$Xdata[[object$sampled]] <- rep(TRUE,nrow(object$Xdata))
object$Zdata[[object$sampled]] <- rep(TRUE,nrow(object$Zdata))
}
}
if(missing(N)){
N = N_w + N_m
gw = log(N_w/N) # to ensure exp(gw)+exp(gm) = 1
gm = log(N_m/N) # to ensure exp(gw)+exp(gm) = 1
}
if(!missing(num_women) & !missing(num_men)){
N <- num_men + num_women
gw = log(num_women/N)
gm = log(num_men/N)
}else{
if(missing(num_women)){
if(!missing(num_men)){
if(num_men >= N){stop("The number of people in the population should be at least as large as the number of men.")}
num_women = N-num_men
gw = log(num_women/N) # to ensure exp(gw)+exp(gm) = 1
gm = log(num_men/N) # to ensure exp(gw)+exp(gm) = 1
}else{
# missing num_women, num_men
if(missing(N)){
num_women <- N_w
num_men <- N_m
N <- N_w + N_m
}else{
num_women <- N*N_w/(N_w+N_m)
num_men <- N*N_m/(N_w+N_m)
gw = log(num_women/N)
gm = log(num_men/N)
}
}
}else{ # !missing(num_women) & missing(num_men)
if(num_women >= N){stop("The number of people in the population should be at least as large as the number of women.")}
num_men = N-num_women
gw = log(num_women/N) # to ensure exp(gw)+exp(gm) = 2
gm = log(num_men/N) # to ensure exp(gw)+exp(gm) = 2
}
}
if(!missing(pmfW)){pmfWN <- pmfW}else{pmfWN <- object$pmfW}
if(!missing(pmfM)){pmfMN <- pmfM}else{pmfMN <- object$pmfM}
num_women = round(N*exp(gw))
num_men = round(N*exp(gm))
numX <- nrow(object$pmf)-1
numZ <- ncol(object$pmf)-1
NumBeta <- object$NumBeta
NumGamma <- object$NumGamma
NumGammaW <- object$NumGammaW
NumGammaM <- object$NumGammaM
th_hat <- object$coefficients
pmf_target <- exp(logpmfest(object$coefficients[1:object$NumBeta],
GammaW=object$coefficients[object$NumBeta+(1:object$NumGammaW)],
GammaM=object$coefficients[(object$NumBeta+object$NumGammaW)+(1:object$NumGammaM)],
object$Sd, object$Xd, object$Zd,
pmfWN/sum(pmfWN), pmfMN/sum(pmfMN), gw=gw, gm=gm))
pmf_target[nrow(pmf_target),ncol(pmf_target)] <- 0
pmf_target[-nrow(pmf_target), -ncol(pmf_target)] <- 2*pmf_target[-nrow(pmf_target), -ncol(pmf_target)]
pmf_target <- pmf_target/sum(pmf_target)
out <- list(pmf=pmf_target)
PMF_SW <- pmf_target[-nrow(pmf_target), ncol(pmf_target),drop=FALSE]
out$PMF_SW <- PMF_SW / (PMF_SW + 0.5*apply(pmf_target[ -nrow(pmf_target),-ncol(pmf_target),drop=FALSE],1,sum))
PMF_SM <- pmf_target[ nrow(pmf_target),-ncol(pmf_target),drop=FALSE]
out$PMF_SM <- PMF_SM / (PMF_SM + 0.5*apply(pmf_target[ -nrow(pmf_target),-ncol(pmf_target),drop=FALSE],2,sum))
PMF_PW <- pmf_target[-nrow(pmf_target), ncol(pmf_target),drop=FALSE]
out$PMF_PW <- 1 - out$PMF_SW
out$PMF_PM <- 1 - out$PMF_SM
names(out$PMF_SW) <- names(object$PMF_SW)
names(out$PMF_SM) <- names(object$PMF_SM)
names(out$PMF_PW) <- names(object$PMF_PW)
names(out$PMF_PM) <- names(object$PMF_PM)
if(object$control$logodds_single){
b <- sum(out$PMF_SW*pmfWN)
out$LOGODDS_SW <- th_hat[(NumBeta+1):(NumBeta+NumGammaW)] - log(b/(1-b))
b <- sum(out$PMF_SM*pmfMN)
out$LOGODDS_SM <- th_hat[(NumBeta+NumGammaW+1):(NumBeta+NumGammaW+NumGammaM)] - log(b/(1-b))
}else{
out$LOGODDS_SW <- log(out$PMF_SW/(1-out$PMF_SW))
out$LOGODDS_SM <- log(out$PMF_SM/(1-out$PMF_SM))
}
names(out$LOGODDS_SW) <- names(object$LOGODDS_SW)
names(out$LOGODDS_SM) <- names(object$LOGODDS_SM)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.