Nothing
#' Propensity score matching
#'
#' Propensity score matching is implemented to select external controls who are
#' more relevant to patients in the current trial with respect to covariates
#' of interest.
#' @usage
#' psmatch(
#' formula, data, n.EC,
#' method.psest="glm", method.pslink="logit",
#' method.whomatch, method.matching, method.psorder, n.boot=100)
#' @param formula Object of class \code{formula}, which is a symbolic
#' description of the propensity score model to be fitted. The dependent
#' variable must be named \code{study}. The explanatory variables only include
#' covariates of interest, which must be specified in the form of linear
#' combination.
#' @param data Data frame, which must contain variables named \code{study} for
#' study indicator (0 for external control, and 1 for current trial) and
#' \code{treat} for treatment indicator (0 for concurrent and external control,
#' and 1 for treatment).
#' @param n.EC Number of patients in external control to be selected, which must
#' be smaller than the number of patients in external control pool.
#' @param method.psest Method of estimating the propensity score. Allowable
#' options include, for example, \code{"glm"} for generalized linear model
#' (e.g., logistic regression); \code{"gam"} for generalized additive model;
#' \code{"gbm"} for generalized boosted model; \code{"lasso"} for lasso
#' regression; \code{"rpart"} for classification tree. The default value is
#' \code{method.psest="glm"}.
#' @param method.pslink Link function used in estimating the propensity score.
#' Allowable options depend on the specific \code{method.psest} value specified.
#' The default value is \code{method.pslink="logit"}, which, along with
#' \code{method.psest="glm"}, identifies the default method as logistic
#' regression.
#' @param method.whomatch Options of who to match. Allowable options include
#' \code{conc.contl} for matching concurrent control to external control pool;
#' \code{conc.treat} for matching treatment to external control pool;
#' \code{conc.all} for matching treatment plus concurrent control to external
#' control pool; \code{treat2contl} for matching treatment to concurrent control
#' plus external control pool.
#' @param method.matching Propensity score matching method. Allowable options include
#' \code{"optimal"} for optimal matching; \code{"nearest"} for nearest neighbor
#' matching without replacement; \code{"med.optimal"} for equally splitting
#' patients in the current trial and taking the median of each subset, followed
#' by 1:1 optimal matching; \code{"med.nearest"} for equally splitting
#' patients in the current trial and taking the median of each subset, followed
#' by 1:1 nearest neighbor matching without replacement; \code{"km.optimal"} for
#' k-means clustering of patients in the current trial, followed by 1:1 optimal
#' matching; \code{"km.nearest"} for k-means clustering of patients in the
#' current trial, followed by 1:1 nearest neighbor matching without replacement;
#' \code{"cm.optimal"} for fuzzy c-means clustering of patients in the current
#' trial, followed by 1:1 optimal matching; \code{"cm.nearest"} for fuzzy
#' c-means of patients in the current trial, followed by 1:1 nearest neighbor
#' matching without replacement; \code{"boot.optimal"} for bootstrap sampling
#' from patients in the current trial, followed by 1:1 optimal matching;
#' \code{"boot.nearest"} for bootstrap sampling from patient in the current
#' trial, followed by 1:1 nearest neighbor matching without replacement.
#' @param method.psorder Order that the matching takes place when a nearest
#' neighbor matching is used. Allowable options include \code{"largest"},
#' where matching takes place in descending order of propensity score;
#' \code{"smallest"}, where matching takes place in ascending order of
#' propensity score; \code{"random"}, where matching takes place in a random
#' order; \code{"data"}, where matching takes place based on the order of units
#' in the data. The matching order must be specified when using the nearest
#' neighbor matching.
#' @param n.boot Number of bootstrap sampling, which must be specified when
#' \code{method.matching="boot.optimal"} or
#' \code{method.matching="boot.nearest"}. The default value is \code{n.boot=100}.
#' @details The propensity score is defined as the conditional probability of
#' having been included in the current trial given observed covariates. There
#' are four options applicable for to whom the patients in external control
#' pool are matched, including (i) concurrent control versus external control
#' pool (\code{"conc.contl"}), (ii) treatment versus external control pool
#' (\code{"conc.treat"}), (iii) treatment plus concurrent control versus
#' external control pool (\code{"conc.all"}), and (iv) treatment versus
#' concurrent control plus external control pool (\code{"treat2contl"}).
#' Along with \code{method.whomatch="conc.contl"}, two 1:1 matching methods are
#' applicable: (1) optimal matching (\code{"optimal"}), and (2) nearest neighbor
#' matching without caliper (\code{"nearest"}). Along with
#' \code{method.whomatch="conc.treat"} or \code{method.whomatch="conc.all"},
#' ten matching methods are applicable: (1) optimal matching, where 1:1
#' matching is first done, followed by random sampling (\code{"optimal"}),
#' (2) nearest neighbor matching, where caliper is tuned iteratively
#' to obtain the fixed number of external controls (\code{"nearest"}), (3)
#' equally splitting patients in the current trial and taking the median of
#' each subset, followed by 1:1 optimal matching (\code{"medm.optimal"}), (4)
#' equally splitting patients in the current trial and taking the median of
#' each subset, followed by 1:1 nearest neighbor matching matching
#' (\code{"med.nearest"}), (5) k-means clustering of patients in the current
#' trial, followed by 1:1 optimal matching (\code{"km.optimal"}), (6) k-means
#' clustering of patients in the current trial, followed by 1:1 nearest neighbor
#' matching (\code{"km.nearest"}), (7) fuzzy c-means clustering of patients in
#' the current trial, followed by 1:1 optimal matching (\code{"cm.optimal"}),
#' (8) fuzzy c-means of patients in the current trial, followed by 1:1 nearest
#' neighbor matching (\code{"cm.nearest"}), (9) bootstrap sampling from patients
#' in the current trial, followed by 1:1 optimal matching
#' (\code{"boot.nearest"}), and (10) bootstrap sampling from patient in the
#' current trial, followed by 1:1 nearest neighbor matching
#' (\code{"boot.nearest"}). Along with \code{method.whomatch="treat2contl"},
#' two matching methods are applicable: (1) optimal matching, followed by
#' random sampling (\code{"optimal"}), and (2) nearest neighbor matching, where
#' caliper is tuned iteratively to obtain the fixed number of external controls
#' (\code{"nearest"}).
#' @return
#' The \code{psmatch} returns a list containing the following objects:
#' \item{subjid.EC}{Vector of subject ID of external control.}
#' \item{data.ps}{Data frame with estimated propensity score.}
#' @references
#' Austin PC. A comparison of 12 algorithms for matching on the propensity
#' score. *Statistics in Medicine* 2014; 33:1057-1069.
#'
#' Lin J, Gamalo-Siebers M, Tiwari R. Propensity score matched augmented
#' controls in randomized clinical trials: A case study.
#' *Pharmaceutical Statistics* 2018; 17:629-647.
#'
#' Lin J, Gamalo-Siebers M, Tiwari R. Propensity-score-based priors for
#' Bayesian augmented control design. *Pharmaceutical Statistics* 2019;
#' 18:223-238.
#'
#' Rosenbaum PR. Optimal matching for observational studies.
#' *Journal of the American Statistical Association* 1989; 84:1024-1032.
#'
#' Sawamoto R, Oba K, Matsuyama Y. Bayesian adaptive randomization design
#' incorporating propensity score-matched historical controls.
#' *Pharmaceutical Statistics* 2022; 21:1074-1089.
#' @examples
#' n.CT <- 100
#' n.CC <- 50
#' nevent.C <- 100
#' n.ECp <- 1000
#' nevent.ECp <- 800
#' accrual <- 16
#'
#' out.mevent.CT <- 6
#' out.mevent.CC <- 6
#' driftHR <- 1
#'
#' cov.C <- list(list(dist="norm",mean=0,sd=1,lab="cov1"),
#' list(dist="binom",prob=0.4,lab="cov2"))
#'
#' cov.cor.C <- rbind(c( 1,0.1),
#' c(0.1, 1))
#'
#' cov.EC <- list(list(dist="norm",mean=0,sd=1,lab="cov1"),
#' list(dist="binom",prob=0.4,lab="cov2"))
#'
#' cov.cor.EC <- rbind(c( 1,0.1),
#' c(0.1, 1))
#'
#' cov.effect <- c(0.1,0.1)
#'
#' indata <- trial.simulation.t2e(
#' n.CT=n.CT, n.CC=n.CC, nevent.C=nevent.C,
#' n.ECp=n.ECp, nevent.ECp=nevent.ECp, accrual=accrual,
#' out.mevent.CT, out.mevent.CC, driftHR,
#' cov.C=cov.C, cov.cor.C=cov.cor.C,
#' cov.EC=cov.EC, cov.cor.EC=cov.cor.EC, cov.effect=cov.effect)
#'
#' n.EC <- 50
#'
#' method.whomatch <- "conc.treat"
#' method.matching <- "optimal"
#' method.psorder <- NULL
#'
#' psmatch(
#' study~cov1+cov2, data=indata, n.EC=n.EC,
#' method.whomatch=method.whomatch, method.matching=method.matching,
#' method.psorder=method.psorder)
#' @import MatchIt e1071 stats
#' @rawNamespace import(optmatch,except=strata)
#' @export
psmatch <- function(
formula, data, n.EC,
method.psest="glm", method.pslink="logit",
method.whomatch, method.matching, method.psorder, n.boot=100)
{
mf <- stats::model.frame(formula=formula,data=data)
cov.lab <- attr(attr(mf,"terms"),"term.labels")
if(length(data$study)==0){
stop("Dataset must contain a variable of study.")
}else if(length(data$treat)==0){
stop("Dataset must contain a variable of treat.")
}else{
bootmatch.optimal <- function(nwps,dmat)
{
boot.data.match <- rbind(data.frame(match=1,ps=nwps),dmat[dmat$match==0,])
bootmatching <- MatchIt::matchit(match~.,
data = boot.data.match,
method = "optimal",
distance = boot.data.match[,"ps"])
subjid.EC <- as.numeric(bootmatching$match.matrix)
return(subjid.EC)
}
bootmatch.nearest <- function(nwps,dmat,method.psorder)
{
boot.data.match <- rbind(data.frame(match=1,ps=nwps),dmat[dmat$match==0,])
bootmatching <- MatchIt::matchit(match~.,
data = boot.data.match,
method = "nearest",
distance = boot.data.match[,"ps"],
m.order = method.psorder)
subjid.EC <- as.numeric(bootmatching$match.matrix)
return(subjid.EC)
}
medps <- function(invet.ps,n.EC)
{
n.C <- length(invet.ps)
wk.n1 <- floor(n.C/n.EC)
wk.n2 <- n.C-wk.n1*n.EC
wk.n3 <- sample(n.EC,wk.n2)
wk.n4 <- matrix(c(1:n.EC,rep(wk.n1,n.EC)),n.EC,2)
wk.n4[wk.n3,2] <- wk.n1+1
en <- NULL
tmp <- 0
for(i in 1:n.EC){
en <- c(en,wk.n4[i,2]+tmp)
tmp <- tmp+wk.n4[i,2]
}
st <- c(1,en[-n.EC]+1)
wk.ps1 <- sort(invet.ps)
wk.ps2 <- apply(as.matrix(1:n.EC),1,function(x){stats::median(wk.ps1[st[x]:en[x]])})
return(wk.ps2)
}
ncov <- length(cov.lab)
data.cov <- data.frame(data[,c("study","treat",cov.lab)],
conc.contl=0,conc.treat=0,conc.all=0,treat2contl=0)
CT.flg <- (data.cov$study==1)&(data.cov$treat==1)
CC.flg <- (data.cov$study==1)&(data.cov$treat==0)
data.cov[CT.flg,"conc.contl"] <- 99
data.cov[CT.flg,"conc.treat"] <- 1
data.cov[CT.flg,"conc.all"] <- 1
data.cov[CT.flg,"treat2contl"] <- 1
data.cov[CC.flg,"conc.contl"] <- 1
data.cov[CC.flg,"conc.treat"] <- 99
data.cov[CC.flg,"conc.all"] <- 1
data.cov[CC.flg,"treat2contl"] <- 0
fit.ps <- MatchIt::matchit(study~.,
data = data.cov[,c("study",cov.lab)],
method = NULL,
distance = method.psest,
link = method.pslink)
data.cov.ps <- data.frame(data.cov,ps=as.vector(fit.ps$distance))
match.pts <- (data.cov.ps[,method.whomatch]!=99)
data.match <- data.cov.ps[match.pts,c(method.whomatch,"ps")]
colnames(data.match) <- c("match","ps")
if(method.whomatch=="conc.contl"){
n.CC <- sum(CC.flg)
ratio <- n.EC/n.CC
if(method.matching=="optimal"){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"],
ratio = ratio)
subjid.EC <- as.numeric(fit.match$match.matrix)
}else if(method.matching=="nearest"){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"],
m.order = method.psorder,
ratio = ratio)
subjid.EC <- as.numeric(fit.match$match.matrix)
}
}else if((method.whomatch=="conc.treat")|(method.whomatch=="conc.all")){
if(method.matching=="optimal"){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"])
subjid.EC <- sample(as.numeric(fit.match$match.matrix),n.EC)
}else if(method.matching=="nearest"){
ps.M1 <- data.match[data.match[,"match"]==1,"ps"]
ps.M0 <- data.match[data.match[,"match"]==0,"ps"]
ps.M <- sort(abs(apply(as.matrix(ps.M1),1,function(x){x-ps.M0})))
ncal <- n.EC
while(1){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"],
m.order = method.psorder,
caliper = ps.M[ncal],
std.caliper = FALSE)
if(sum(!is.na(fit.match$match.matrix))>=n.EC){
nearest <- fit.match$match.matrix
break
}else{
ncal <- ncal+1
}
}
subjid.EC <- sample(as.numeric(nearest[!is.na(nearest),]),n.EC)
}else if((method.matching=="boot.optimal")|(method.matching=="boot.nearest")){
pre.new.ps <- data.match[data.match$match==1,"ps"]
boot <- replicate(n.boot,sample(pre.new.ps,n.EC))
if(method.matching=="boot.optimal"){
bs.match <- apply(boot,2,function(x){return(bootmatch.optimal(nwps=x,dmat=data.match))})
}else if(method.matching=="boot.nearest"){
bs.match <- apply(boot,2,function(x){return(bootmatch.nearest(nwps=x,dmat=data.match,method.psorder=method.psorder))})
}
ECp.lab <- as.numeric(rownames(data.match[data.match$match==0,]))
bc.prob <- apply(as.matrix(ECp.lab),1,function(x){return(mean(bs.match==x))})
subjid.EC <- ECp.lab[order(bc.prob,decreasing=TRUE)[1:n.EC]]
}else if((method.matching=="med.optimal")|(method.matching=="med.nearest")|
(method.matching=="km.optimal")|(method.matching=="km.nearest")|
(method.matching=="cm.optimal")|(method.matching=="cm.nearest")){
pre.new.ps <- data.match[data.match$match==1,"ps"]
if((method.matching=="med.optimal")|(method.matching=="med.nearest")){
new.ps <- medps(invet.ps=pre.new.ps,n.EC=n.EC)
}else if((method.matching=="km.optimal")|(method.matching=="km.nearest")){
new.ps <- as.vector(stats::kmeans(pre.new.ps,n.EC)$centers)
}else if((method.matching=="cm.optimal")|(method.matching=="cm.nearest")){
new.ps <- as.vector(e1071::cmeans(pre.new.ps,n.EC)$centers)
}
new.data.match <- rbind(data.frame(match=1,ps=new.ps),data.match[data.match$match==0,])
if((method.matching=="med.optimal")|(method.matching=="km.optimal")|(method.matching=="cm.optimal")){
fit.match <- MatchIt::matchit(match~.,
data = new.data.match,
method = "optimal",
distance = new.data.match[,"ps"])
}else if((method.matching=="med.nearest")|(method.matching=="km.nearest")|(method.matching=="cm.nearest")){
fit.match <- MatchIt::matchit(match~.,
data = new.data.match,
method = "nearest",
distance = new.data.match[,"ps"],
m.order = method.psorder)
}
subjid.EC <- as.numeric(fit.match$match.matrix)
}
}else if(method.whomatch=="treat2contl"){
if(method.matching=="optimal"){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"])
subjid.C1 <- as.numeric(fit.match$match.matrix)
subjid.C2 <- subjid.C1[data.cov.ps[subjid.C1,"study"]==0]
if(length(subjid.C2)>=n.EC){
subjid.EC <- sample(subjid.C2,n.EC)
}else{
subjid.EC <- subjid.C2
}
}else if(method.matching=="nearest"){
ps.M1 <- data.match[data.match[,"match"]==1,"ps"]
ps.M0 <- data.match[data.match[,"match"]==0,"ps"]
ps.M <- sort(abs(apply(as.matrix(ps.M1),1,function(x){x-ps.M0})))
ncal <- n.EC
while(1){
fit.match <- MatchIt::matchit(match~.,
data = data.match,
method = method.matching,
distance = data.match[,"ps"],
m.order = method.psorder,
caliper = ps.M[ncal],
std.caliper = FALSE)
subjid.C1 <- fit.match$match.matrix
subjid.C2 <- as.numeric(subjid.C1[!is.na(subjid.C1)])
subjid.C3 <- subjid.C2[data.cov.ps[subjid.C2,"study"]==0]
if(length(subjid.C3)>=n.EC){
nearest <- subjid.C3
break
}else{
ncal <- ncal+1
}
}
subjid.EC <- sample(nearest,n.EC)
}
}
return(list(subjid.EC=subjid.EC,data.ps=data.cov.ps[,c("study","treat",cov.lab,"ps")]))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.