#####################################################################################
#
# Apply data example for Win Ratio
#
# Di Zhang
# 2/5/2018
# Last modified: 3/21/2019
#
#####################################################################################
WR.CRT<-function(treatment, cluster, y1, y2, delta1, delta2, null.WR=1, alpha.sig=0.05){
#
# #---------------------------- Data analysis
# # input data set
# data<-read.csv(paste(path.in, "clusterData.csv", sep=""), header = T, stringsAsFactors = F)
# data$treatment[data$treatment==1]<-0
# data$treatment[data$treatment==2]<-1
#
#
# # define package input arguements
# treatment=data$treatment
# cluster=data$oinst # if it's independent analysis without clustering, then set each individual a unique cluster ID. e.g. sequence number from 1 to n.
# y1=data$Y1
# y2=data$Y2
# delta1=data$delta1
# delta2=data$delta2
# null.WR=1
# alpha.sig=0.05
#
ll<-list(treatment, cluster, y1, y2, delta1, delta2)
# ---------------- warning message:
# check if all vectors input have the same length
llen<-sapply(ll, function(x) length(x))
if(sum(llen)!=length(treatment)*length(llen)) stop("Error: Input arguements 'treatment','cluster','y1','y2','delta1', and 'delta2' have to be numeric vectors with the same length")
# check if all vectors input are numeric vectors
lna<-sapply(ll, function(x) is.numeric(x))
if(!(all(lna==T))) stop("Error: All input arguements have to be numeric vectors")
# check if treatment is coded as 0 or 1
if(!(all(treatment%in%c(0,1)))) stop("Error: Treatment status has to be coded as 0 (control) or 1 (treatment)")
# check if cluster ID is a positive interger vector
if(!((all(cluster == floor(cluster)))&(all(cluster>0))))stop("Error: Cluster ID has to be positive integer numbers")
# check if all event times are all non-negative
if(!all(y1>=0)) stop("Error: All time to non-fatal event have to be non-negative")
if(!all(y2>=0)) stop("Error: All time to fatal event have to be non-negative")
# check if delta1 and delta2 are coded as 0 and 1
if(!all(delta1%in%c(0,1))) stop("Error: Vector delta1 can only contain 0 or 1")
if(!all(delta2%in%c(0,1))) stop("Error: Vector delta2 can only contain 0 or 1")
# form a data set
data<-data.frame(treatment, cluster, y1, y2, delta1, delta2)
# rank data by treatment and then by cluster
data<-data[order(data$treatment, data$cluster),]
# obtain number of clusters per comparison group
n<-length(unique(data$cluster[data$treatment==0])) # number of control clusters
m<-length(unique(data$cluster[data$treatment==1])) # number of treatment clusters
J<-n+m
# Define some common quantities;
Nj_c<-as.numeric(table(data$cluster[data$treatment==0]))
Nj_t<-as.numeric(table(data$cluster[data$treatment==1]))
Jm_bar<-mean(Nj_t)
Ln_bar<-mean(Nj_c)
Jm_bar2<-mean(sapply(Nj_t, function(x) x*(x-1)))
Ln_bar2<-mean(sapply(Nj_c, function(x) x*(x-1)))
xy12.den<-m*Jm_bar*((n^2)*(Ln_bar^2)-n*Ln_bar2-n*Ln_bar)
x12y.den<-n*Ln_bar*(m^2*Jm_bar^2-m*Jm_bar2-m*Jm_bar)
xi12GTy12.den<-m*Jm_bar2*(n^2*Ln_bar^2-n*Ln_bar2-n*Ln_bar)
x12GTyi12.den<-n*Ln_bar2*(m^2*Jm_bar^2-m*Jm_bar2-m*Jm_bar)
# Add comparison scenarios for each observation.
data$cat<-ifelse((data$delta1==1)&(data$delta2==1), 1,
ifelse((data$delta1==0)&(data$delta2==1),2,
ifelse((data$delta1==0)&(data$delta2==0),3,
ifelse((data$delta1==1)&(data$delta2==0),4,NA))))
# (4) Set up criteria for X>Y, X<Y, and X><Y
# Pairwise comparison between all observations from treatment group and control group.
# Seperate patients in treatment group and control group into two data sets.
data_c<-data[which(data$treatment==0),]
data_t<-data[which(data$treatment==1),]
# (5) compare control and treatment observations to determine win or loss.
# --------- Treatment individual wins:
# condition (1)
cond1<-cond1(cat_c=data_c$cat, y2_c=data_c$y2, y2_t=data_t$y2)
# each column is each obs in control group; each row is each obs in treatment group;
# condition (3)
# (3)_1
cond3_1<-cond3_1(cat_c=data_c$cat, y2_c=data_c$y2, y2_t=data_t$y2, y1_c=data_c$y1, y1_t=data_t$y1)
# (3)_2
cond3_2<-cond3_2(cat_c=data_c$cat, cat_t=data_t$cat, y2_c=data_c$y2, y2_t=data_t$y2, y1_c=data_c$y1, y1_t=data_t$y1)
cond3<-cond3_1+cond3_2
win_t<-cond1+cond3
# --------- Control individual wins:
# condition (2)
cond2<-cond2(cat_t=data_t$cat, y2_c=data_c$y2, y2_t=data_t$y2)
# each column is each obs in control group; each row is each obs in treatment group;
# condition (4)
# (4)_1
cond4_1<-cond4_1(cat_t=data_t$cat, y2_c=data_c$y2, y2_t=data_t$y2, y1_c=data_c$y1, y1_t=data_t$y1)
# (4)_2
cond4_2<-cond4_2(cat_c=data_c$cat, cat_t=data_t$cat, y2_c=data_c$y2, y2_t=data_t$y2, y1_c=data_c$y1, y1_t=data_t$y1)
cond4<-cond4_1+cond4_2
win_c<-cond2+cond4
# (3) Calculate U1, U2.
U1<-sum(win_t) / (m*n)
U2<-sum(win_c) / (m*n)
# (4) Calculate theta1_hat, theta2_hat.
theta1<-mean(win_t)
theta2<-mean(win_c)
# (5) Calculate Var(U1), Var(U2);
# Kernel 1----------------------------------------------------------;
### Calculate Var(phi1(x));
# Comparison of same x with different y from different clusters.
xGTy12.out<-x12GTy12_Rcpp_test(t(win_t),m, n, Nj_t, Nj_c)
var.phi1x<-(xGTy12.out/xy12.den) - theta1^2
rm(xGTy12.out)
### Calculate Var(phi1(y));
# Comparison of same y with different x from different clusters.
x12GTy.out<-x12GTy12_Rcpp_test(win_t, n, m, Nj_c, Nj_t)
var.phi1y<-(x12GTy.out/x12y.den) - theta1^2
rm(x12GTy.out)
### Calculate Cov(phi1(x1), phi1(x2));
# Comparison of different x from the same cluster with different y from different clusters.
if (sum(Nj_t)==length(Nj_t)){
cov.phi1xphix2<-0
}else{
xi12GTy12.out<-xi12GTy12_Rcpp_test(t(win_t),m, n, Nj_t, Nj_c)
cov.phi1xphix2<- (xi12GTy12.out/xi12GTy12.den) - theta1^2
rm(xi12GTy12.out)
}
### Calculate Cov(phi1(y1), phi1(y2));
# Comparison of different y from the same cluster with different x from different clusters.
if (sum(Nj_t)==length(Nj_t)){
cov.phi1yphiy2<-0
}else{
x12GTyi12.out<-xi12GTy12_Rcpp_test(win_t, n, m, Nj_c, Nj_t)
cov.phi1yphiy2<- (x12GTyi12.out/x12GTyi12.den) - theta1^2
rm(x12GTyi12.out)
}
# Therefore, the Var(U1):
var.U1<-((Ln_bar^2)/m)*(Jm_bar*var.phi1x+Jm_bar2*cov.phi1xphix2) + ((Jm_bar^2)/n)*(Ln_bar*var.phi1y+Ln_bar2*cov.phi1yphiy2)
# For kernel 2----------------------------------------------------------;
### Calculate Var(phi2(x));
# Comparison of same x with different y from different clusters.
xGTy12.out2<-x12GTy12_Rcpp_test(t(win_c),m, n, Nj_t, Nj_c)
var.phi2x<-(xGTy12.out2/xy12.den) - theta2^2
rm(xGTy12.out2)
### Calculate Var(phi2(y));
# Comparison of same y with different x from different clusters.
x12GTy.out2<-x12GTy12_Rcpp_test(win_c, n, m, Nj_c, Nj_t)
var.phi2y<-(x12GTy.out2/x12y.den) - theta2^2
rm(x12GTy.out2)
### Calculate Cov(phi2(x1), phi2(x2));
# Comparison of different x from the same cluster with different y from different clusters.
if (sum(Nj_t)==length(Nj_t)){
cov.phi1xphix22<-0
}else{
xi12GTy12.out2<-xi12GTy12_Rcpp_test(t(win_c),m, n, Nj_t, Nj_c)
cov.phi1xphix22<- (xi12GTy12.out2/xi12GTy12.den) - theta2^2
rm(xi12GTy12.out2)
}
### Calculate Cov(phi2(y1), phi2(y2));
# Comparison of different y from the same cluster with different x from different clusters.
if (sum(Nj_t)==length(Nj_t)){
cov.phi1yphiy22<-0
}else{
x12GTyi12.out2<-xi12GTy12_Rcpp_test(win_c, n, m, Nj_c, Nj_t)
cov.phi1yphiy22<- (x12GTyi12.out2/x12GTyi12.den) - theta2^2
rm(x12GTyi12.out2)
}
# Therefore, the Var(U2):
var.U2<-((Ln_bar^2)/m)*(Jm_bar*var.phi2x+Jm_bar2*cov.phi1xphix22) + ((Jm_bar^2)/n)*(Ln_bar*var.phi2y+Ln_bar2*cov.phi1yphiy22)
# For Cov (U1, U2)----------------------------------------------------------;
### Calculate Cov(phi1(x), phi2(x));
# Comparison of same x with different y from different clusters between kernels.
covxGTy12.out<-covx12GTy12_Rcpp_test(t(win_t), t(win_c),m, n, Nj_t, Nj_c)
cov.phi1xphi2x<-(covxGTy12.out/(xy12.den)) - theta1*theta2
rm(covxGTy12.out)
### Calculate Cov(phi1(y), phi2(y));
# Comparison of same y with different x from different clusters between kernels.
covx12GTy.out<-covx12GTy12_Rcpp_test(win_c, win_t, n, m, Nj_c, Nj_t)
cov.phi1yphi2y<-(covx12GTy.out/(x12y.den)) - theta1*theta2
rm(covx12GTy.out)
### Calculate Cov(phi1(x1), phi2(x2));
if (sum(Nj_t)==length(Nj_t)){
cov.phi1x1phi2x2<-0
}else{
covxi12GTy12.out<-covxi12GTy12_Rcpp_test(t(win_t), t(win_c),m, n, Nj_t, Nj_c)
cov.phi1x1phi2x2<-covxi12GTy12.out/(xi12GTy12.den) - theta1*theta2
rm(covxi12GTy12.out)
}
### Calculate Cov(phi1(y1), phi2(y2));
if (sum(Nj_t)==length(Nj_t)){
cov.phi1y1phi2y2<-0
}else{
covx12GTyi12.out<-covxi12GTy12_Rcpp_test(win_t, win_c, n, m, Nj_c, Nj_t)
cov.phi1y1phi2y2<-covx12GTyi12.out/(x12GTyi12.den) - theta1*theta2
rm(covx12GTyi12.out)
}
# Therefore, the Cov(U1, U2):
cov.U1U2<-((Ln_bar^2)/m)*(Jm_bar*cov.phi1xphi2x+Jm_bar2*cov.phi1x1phi2x2)+((Jm_bar^2)/n)*(Ln_bar*cov.phi1yphi2y+Ln_bar2*cov.phi1y1phi2y2)
# (6) Use Delta Method to do hypothesis test under null.
test_stat<-(log(U1/U2)-log(null.WR)) / sqrt(var.U1/(U1^2) - 2*cov.U1U2/(U1*U2) + var.U2/(U2^2))
# store estimates
# estimated log(WR)
logWR<-log(U1/U2)
# estimated variance of log(WR)
EV<-var.U1/(U1^2) - 2*cov.U1U2/(U1*U2) + var.U2/(U2^2)
se<-sqrt(EV)
# 95% CI
ci<-paste("(",round(logWR-1.96*se,3),",",round(logWR+1.96*se,3),")", sep = "")
# calculate P-value
p_value<-(1-pnorm(test_stat))*2
# variance covariance matrix of U1 and U2
var_cov<-matrix(c(var.U1, rep(cov.U1U2,2), var.U2),2,2)
if(sum(Nj_t)==length(Nj_t)){
title<-"Test of two composite endpoints for independent survival data"
}else{
title<-"Test of two composite endponits for clustered survival data"
}
output<-list(title, U1, U2, logWR, se, test_stat, ci, p_value, var_cov)
names(output)<-c("name","U1", "U2", "logWR", "se", "z", "ci", "p", "var_cov")
return(output)
}
WR.causal<-function(treatment, cluster, y1, y2, delta1, delta2, x.con=NA, x.char=NA, null.WR=1, alpha.sig=0.05, control=NA, n.boot=200)
{
#----- Starting the function ... -------------------------------------------------
library(fastDummies)
library(lme4)
library(reshape2)
library(BB)
library(nleqslv)
library(MASS)
ll<-list(treatment, cluster, y1, y2, delta1, delta2)
# ---------------- Warning Message: ---------------------------------
# check if all vectors input have the same length
llen1<-sapply(ll, function(x) length(x))
if(sum(llen1)!=length(treatment)*length(llen1)) stop("Error: Input arguements 'treatment','cluster','y1','y2','delta1', and 'delta2' have to be numeric vectors with the same length")
# check if all vectors input are numeric vectors
lna<-sapply(ll, function(x) is.numeric(x))
if(!(all(lna==T))) stop("Error: Input arguements 'treatment','cluster','y1','y2','delta1', and 'delta2' have to be numeric vectors")
# check if treatment is coded as 0 or 1
if(!(all(treatment%in%c(0,1)))) stop("Error: Treatment status has to be coded as 0 (control) or 1 (treatment)")
# check if cluster ID is a positive interger vector
if(!((all(cluster == floor(cluster)))&(all(cluster>0))))stop("Error: Cluster ID has to be positive integer numbers")
# check if all event times are all non-negative
if(!all(y1>=0)) stop("Error: All time to non-fatal event have to be non-negative")
if(!all(y2>=0)) stop("Error: All time to fatal event have to be non-negative")
# check if delta1 and delta2 are coded as 0 and 1
if(!all(delta1%in%c(0,1))) stop("Error: Vector delta1 can only contain 0 or 1")
if(!all(delta2%in%c(0,1))) stop("Error: Vector delta2 can only contain 0 or 1")
# at least one of the covariate matrix should be specified
if((sum(is.na(x.con))!=0)&(sum(is.na(x.char))!=0)) stop("Error: At least one covariate matrix without any missing should be specified: x.con or x.char.")
if(sum(is.na(x.char))==0){
# check if the covariate matrix is type matrix
if(!is.matrix(x.char)) stop("Error: Input argument 'x.char' must be a matrix")
# check if the categorical covariate matrix has characteristic type in each column
if(!all(apply(x.char, 2, function(x) is.character(x))==TRUE)) stop("Error: Input argument 'x.char' must be a character matrix")
# check if all covariate matrix have the same number of rows
llen<-c(llen1, nrow(x.char))
if(sum(llen)!=length(treatment)*length(llen)) stop("Error: The row number of the input arguements 'x.char' has to be the same as the length of 'treatment','cluster','y1','y2','delta1' or 'delta2' ")
if(sum(is.na(x.con))==0){
# check if the covariate matrix is type matrix
if(!is.matrix(x.con)) stop("Error: Input argument 'x.con' must be a matrix")
# check if the continuous covariate matrix has numeric type in each column
if(!all(apply(x.con, 2, function(x) is.numeric(x))==TRUE)) stop("Error: Input argument 'x.con' must be a numeric matrix")
# check if all covariate matrix have the same number of rows
llen<-c(llen1, nrow(x.con))
if(sum(llen)!=length(treatment)*length(llen)) stop("Error: The row number of the input arguements 'x.con' has to be the same as the length of 'treatment','cluster','y1','y2','delta1' or 'delta2' ")
}
}else if(sum(is.na(x.con))==0){
# check if the covariate matrix is type matrix
if(!is.matrix(x.con)) stop("Error: Input argument 'x.con' must be a matrix")
# check if the continuous covariate matrix has numeric type in each column
if(!all(apply(x.con, 2, function(x) is.numeric(x))==TRUE)) stop("Error: Input argument 'x.con' must be a numeric matrix")
# check if all covariate matrix have the same number of rows
llen<-c(llen1, nrow(x.con))
if(sum(llen)!=length(treatment)*length(llen)) stop("Error: The row number of the input arguements 'x.con' has to be the same as the length of 'treatment','cluster','y1','y2','delta1' or 'delta2' ")
}
#--------------------------------------------------------------------------
#---------- Analysis ----------------
if(sum(is.na(x.char))==0){
# define covariate matrix column names
x.char.name<-paste0("char", 1:ncol(x.char))
colnames(x.char)<-x.char.name
# convert character variables to factor variable
for(yy in 1:length(x.char.name)){
x.char[,x.char.name[yy]]<-as.factor(x.char[,x.char.name[yy]])
}
# create dummy variables for character variables
dummy<-dummy_cols(x.char[,], remove_first_dummy = TRUE)
dummy<-dummy[,-which(colnames(dummy)%in%x.char.name)]
colnames(x.char)<-x.char.name<-colnames(dummy)
x.char<-dummy
if(sum(is.na(x.con))==0){
data.cov<-data.frame(x.con, x.char)
x.con.name<-paste0("con", 1:ncol(x.con))
colnames(data.cov)<-c(x.con.name, x.char.name)
}else{
data.cov<-data.frame(x.char)
colnames(data.cov)<-c(x.char.name)
}
}else if(sum(is.na(x.con))==0){
data.cov<-data.frame(x.con)
x.con.name<-paste0("con", 1:ncol(x.con))
colnames(data.cov)<-c(x.con.name)
}
data<-data.frame(treatment, cluster, y1, y2, delta1, delta2, data.cov)
# check missingness
# delete any missing observations
if(!all(complete.cases(data)==TRUE)){
data<-data[complete.cases(data),]
print(paste0("Warning: Missing observations are deleted, and resulting ", nrow(data), " observations in the analysis"))
}
# define basic quantities
N<-nrow(data) # total # subjects
list.cluster<-unique(data$cluster) # list of cluster ID
n.cluster<-length(unique(data$cluster)) # total # clusters
ind_logWR<-log(null.WR) # null log WR.
n<-table(data$treatment)[1] # # control patients
m<-table(data$treatment)[2] # # trt patients
assign.percent<-m/N # % trt assignment
x=colnames(data.cov)
# ---- Pairwise comparisons --------------
# Add comparison scenarios for each observation.
data$cat<-ifelse((data$delta1==1)&(data$delta2==1), 1,
ifelse((data$delta1==0)&(data$delta2==1),2,
ifelse((data$delta1==0)&(data$delta2==0),3,
ifelse((data$delta1==1)&(data$delta2==0),4,NA))))
# (4) Set up criteria for X>Y, X<Y, and X><Y
# Pairwise comparison between all observations from treatment group and control group.
# Seperate patients in treatment group and control group into two data sets.
data_c<-data[which(data$treatment==0),]
data_t<-data[which(data$treatment==1),]
# (5) compare control and treatment observations to determine win or loss.
comp<-PairwiseComp_Obs(data_c=data_c, data_t=data_t)
win_t<-comp[[1]]
win_c<-comp[[2]]
#-----------------------------------------------------------------------------------
# analysis
if(length(unique(cluster))==nrow(data)){
title<-"Causal inference of two composite endpoints for independent survival data"
########## For independent subjects without clustering ######################
# fit PS model
# create formula for later use
fmla<-formula(paste0("treatment~", paste0(x, collapse = "+")))
# calculate the propensity score (correctly specified)
fit<-glm(fmla, family=binomial(link='logit'), data=data)
phi<-summary(fit)$coeff[,1] # PS model coefficient: intercept, z1, z2
dm<-as.matrix(cbind(rep(1, N),data[,x]))
est.p<-exp(dm%*%phi)/(1+exp(dm%*%phi))
data$ps<-est.p
# separate treatment and control ps
ps_c<-data[which(data$treatment==0),"ps"]
ps_t<-data[which(data$treatment==1),"ps"]
#----------------------- Point Estimation -------------------------------------
# create matrix of control and PS
ps_cMatrix<-matrix(rep(ps_c, m), nrow=m, byrow = T)
ps_tMatrix<-matrix(rep(ps_t, n), nrow=m)
# calculate IPW estimator U1 and U2
# First kernel: phi_1(X>Y)
U1_IPW1<-0.5*(1/choose(m+n,2))*sum(win_t/((1-ps_cMatrix)*ps_tMatrix))
# Second kernel: phi_1(X<Y)
U2_IPW1<-0.5*(1/choose(m+n,2))*sum(win_c/((1-ps_cMatrix)*ps_tMatrix))
# log(WR)
logWR<-log(U1_IPW1/U2_IPW1)
#----------------------- Variance Estimation -------------------------------------
# ----- Influence function of U1_IPW
h0_Y<-apply(win_t, 2, mean)
h1_Y<-apply(win_t, 1, mean)
g1_IP<-c(h1_Y/ps_t,h0_Y/(1-ps_c))*0.5
first<-2*(g1_IP-U1_IPW1)
# pass in covariate matrix: first column - treatment indicator (1=trt, 0=control)
covMatrix<-as.matrix(data[, c("treatment",x)])
h<<-c(win_t)
Ani<-An(phi, covMatrix, n, m, h)
An.f<-apply(attr(Ani,"gradient"), 2, sum)/choose(m+n,2)
# calculate the influence function of the PS model: logistic regression: l_tilt
covMatrix<-as.matrix(data[, x])
yi<<-data[, "treatment"]
# ---- first derivative of log likelihood function
li<-attr(l_tilt(phi, covMatrix), "gradient")
# attr(, "gradient") contains all the values of first derivative of log likelihood for each sample
# ---- information matrix
ii<-attr(l_tilt(phi, covMatrix), "hessian")
# attr(, "hessian") contains all the values of information matrix of log likelihood for each sample. It's 3-dimensional
l_tilt.f<-sapply(1:(n+m), function(x) ginv(ii[x,,])%*%li[x,])
second<-c(t(An.f)%*%l_tilt.f)
# influence function of U1_IPW
inf.U1_IPW<-first+second
var.U1_IPW<-mean(inf.U1_IPW^2)/(n+m)
# ----- Influence function of U2_IPW
h0_Y<-apply(win_c, 2, mean)
h1_Y<-apply(win_c, 1, mean)
g1_IP<-c(h1_Y/ps_t,h0_Y/(1-ps_c))*0.5
first<-2*(g1_IP-U1_IPW1)
# pass in covariate matrix: first column - treatment indicator (1=trt, 0=control)
covMatrix<-as.matrix(data[, c("treatment",x)])
h<<-c(win_c)
Ani<-An(phi, covMatrix, n, m, h)
An.f<-apply(attr(Ani,"gradient"), 2, sum)/choose(m+n,2)
# calculate the influence function of the PS model: logistic regression: l_tilt
covMatrix<-as.matrix(data[,x])
yi<<-data[, "treatment"]
# ---- first derivative of log likelihood function
li<-attr(l_tilt(phi, covMatrix), "gradient")
# attr(, "gradient") contains all the values of first derivative of log likelihood for each sample
# ---- information matrix
ii<-attr(l_tilt(phi, covMatrix), "hessian")
# attr(, "hessian") contains all the values of information matrix of log likelihood for each sample. It's 3-dimensional
l_tilt.f<-sapply(1:(n+m), function(x) ginv(ii[x,,])%*%li[x,])
second<-c(t(An.f)%*%l_tilt.f)
# influence function of U2_IPW
inf.U2_IPW<-first+second
var.U2_IPW<-mean(inf.U2_IPW^2)/(n+m)
### Therefore, the estimated asymtotic variance of estimated log(WR) is
se<-sqrt(mean((inf.U1_IPW/U1_IPW1-inf.U2_IPW/U2_IPW1)^2)/(n+m))
test_stat<-(logWR-ind_logWR)/se
U1<-U1_IPW1
U2<-U2_IPW1
lam.out<-NA
# data info
info<-paste0("Total ", N, " subjects. Control patients = ", n, " treatment patients = ", m)
list.cluster<-NA
}else{
title<-"Causal inference of two composite endpoints for cluster-dependent survival data"
########## For cluster-dependent subjects with clustering ######################
#--------------- exclude clusters that have only one membership
ti<-table(data$treatment, data$cluster)
tt<-t(ti)
tt[tt==0]<-NA
tt<-as.numeric(colnames(ti)[complete.cases(tt)])
data3<-data[which(data$cluster%in%tt),] # data3 is the data set with all clusters have both memberships
N<-nrow(data3) # total # subjects
list.cluster<-unique(data3$cluster) # list of selected cluster ID
n.cluster<-length(unique(data3$cluster)) # total # clusters
n<-table(data3$treatment)[1] # # control patients
m<-table(data3$treatment)[2] # # trt patients
# rank the data3 by cluster ID and by treatment
data3<-data3[order(data3$cluster, data3$treatment),]
# re-name cluster ID to make it have consecutive cluster ID
for(j in 1:length(unique(data3$cluster))){
data3[which(data3$cluster==unique(data3$cluster)[j]), "cluster"]<-j
}
#------------------- Estimate PS
# create formula for later use
fmla<-formula(paste0("treatment~", paste0(x, collapse = "+")))
# fit PS model, without cluster effect: standard logistic regression with iid r.v.
fit<-glm(fmla, family=binomial(link='logit'), data=data3)
phi<-summary(fit)$coeff[,1] # PS model coefficient: intercept, z1, z2
dm<-as.matrix(cbind(rep(1, nrow(data3)),data3[,x]))
est.p<-exp(dm%*%phi)/(1+exp(dm%*%phi))
data3$raw.ps<-est.p
# estimate PS using calibration technique.
fmla.boot<-formula(paste0("treatment~0+", paste0(x, collapse = "+")))
fit.boot<-glm(fmla.boot, family=binomial(link='logit'), data=data3)
lam1<-summary(fit.boot)$coeff[,1]
lam<-rep(lam1, 2)
#lam<-BBsolve(par=lam, data=data3, x=x, fn=calibrationPS_lambda, control = list(maxit=maxit, tol=tol))$par
if(sum(is.na(control))){
lam.out<-nleqslv(lam, fn=calibrationPS_lambda,data=data3, x.cov=x)
}else{
lam.out<-nleqslv(lam, control=control, fn=calibrationPS_lambda,data=data3, x.cov=x)
}
lam<-lam.out$x
# print convergence status
if(lam.out$termcd==1){
print("Successful Convergence.")
}else if(lam.out$termcd==2){
print("Lambda values within tolerance.")
}else{
print("Unsuccessful Convergence. Check '$convergence' and R package 'nleqslv' for details.")
}
# calculate the new estimated PS
cal<-calibrationPS(lam=lam, data=data3, x.cov=x)
data3$cal.weight<-cal[[1]]
data3$cal.ps<-cal[[2]]
#--------------- Outcome model: stratification
# subset data by clusters
aa<-split.data.frame(data3, data3$cluster)
# calculate cluster specific estimates
U1<-rep(NA, length(aa))
U2<-rep(NA, length(aa))
tw<-rep(NA, length(aa)) # total weight in each cluster
for(i in 1:length(aa)){
# data set for cluster i
temp<-aa[[i]]
# re-calculate pairwise comparison of win_t and win_c
temp_c<-temp[which(temp$treatment==0),]
temp_t<-temp[which(temp$treatment==1),]
comp<-PairwiseComp_Obs(data_c=temp_c, data_t=temp_t)
temp.win_t<-comp[[1]]
temp.win_c<-comp[[2]]
# separate treatment and control weight
w_c<-temp[which(temp$treatment==0),"cal.weight"]
w_t<-temp[which(temp$treatment==1),"cal.weight"]
temp.m<-length(w_t)
temp.n<-length(w_c)
#----------------------- Point Estimation -------------------------------------
# create matrix of control and treatment weight
w_cMatrix<-matrix(rep(w_c, temp.m), nrow=temp.m, byrow = T)
w_tMatrix<-matrix(rep(w_t, temp.n), nrow=temp.m)
# calculate IPW estimator U1 and U2
# First kernel: phi_1(X>Y)
U1[i]<-0.5*(1/choose(nrow(temp),2))*sum(temp.win_t*(w_cMatrix*w_tMatrix))
# Second kernel: phi_1(X<Y)
U2[i]<-0.5*(1/choose(nrow(temp),2))*sum(temp.win_c*(w_cMatrix*w_tMatrix))
# calculate weight for each observation
tw[i]<-sum(temp$cal.weight)
}
# calculate the clustered estimator
U1_IPW5<-sum(tw*U1)/sum(tw)
U2_IPW5<-sum(tw*U2)/sum(tw)
logWR<-log(U1_IPW5/U2_IPW5)
#--- Variance estimation using Bootstrap -----
print("Estimating variance using bootstrap on clusters ...")
boot.logWR5<-rep(NA, n.boot)
for(b in 1:n.boot){
ind.cluster<-sample(unique(data$cluster), n.cluster, replace=T)
for(i in 1:length(ind.cluster)){
temp<-data[data$cluster==ind.cluster[i],]
temp$cluster<-i
if(i==1){
bootdata<-temp
}else{
bootdata<-rbind(bootdata, temp)
}
}
dc<-bootdata[which(bootdata$treatment==0),]
dt<-bootdata[which(bootdata$treatment==1),]
boot.N<-nrow(bootdata)
boot.m<-nrow(dt)
boot.n<-nrow(dc)
#--------------- exclude clusters that have only one membership
ti<-table(bootdata$treatment, bootdata$cluster)
tt<-t(ti)
tt[tt==0]<-NA
tt<-as.numeric(colnames(ti)[complete.cases(tt)])
data3<-bootdata[which(bootdata$cluster%in%tt),] # data3 is the data set with all clusters have both memberships
# rank the data3 by cluster ID and by treatment
data3<-data3[order(data3$cluster, data3$treatment),]
# re-name cluster ID to make it have consecutive cluster ID
for(j in 1:length(unique(data3$cluster))){
data3[which(data3$cluster==unique(data3$cluster)[j]), "cluster"]<-j
}
#------------------- Estimate PS
# fit PS model, without cluster effect: standard logistic regression with iid r.v.
fit<-glm(fmla, family=binomial(link='logit'), data=data3)
phi<-summary(fit)$coeff[,1] # PS model coefficient: intercept, z1, z2
dm<-as.matrix(cbind(rep(1, nrow(data3)),data3[,x]))
est.p<-exp(dm%*%phi)/(1+exp(dm%*%phi))
data3$raw.ps<-est.p
# calculate initial values separately in treatment and control group
fmla.boot<-formula(paste0("treatment~0+", paste0(x, collapse = "+")))
fit.boot<-glm(fmla.boot, family=binomial(link='logit'), data=data3)
lam1<-summary(fit.boot)$coeff[,1]
lam<-rep(lam1, 2)
#lam<-BBsolve(par=lam, data=data3, x=x, fn=calibrationPS_lambda, control = list(maxit=maxit, tol=tol))$par
if(sum(is.na(control))){
lam.out.boot<-nleqslv(lam, fn=calibrationPS_lambda,data=data3, x.cov=x)
}else{
lam.out.boot<-nleqslv(lam, control=control, fn=calibrationPS_lambda,data=data3, x.cov=x)
}
lam<-lam.out.boot$x
# # print convergence status
# if(lam.out$termcd==1){
# print("Successful Convergence.")
# }else if(lam.out$termcd==2){
# print("Lambda values within tolerance.")
# }else{
# print("Unsuccessful Convergence. Check '$convergence' and R package 'nleqslv' for details.")
# }
# calculate the new estimated PS
cal<-calibrationPS(lam=lam, data=data3, x=x)
data3$cal.weight<-cal[[1]]
data3$cal.ps<-cal[[2]]
#--------------- Outcome model: stratification
# subset data by clusters
aa<-split.data.frame(data3, data3$cluster)
# calculate cluster specific estimates
U1<-rep(NA, length(aa))
U2<-rep(NA, length(aa))
tw<-rep(NA, length(aa)) # total weight in each cluster
for(i in 1:length(aa)){
# data set for cluster i
temp<-aa[[i]]
# re-calculate pairwise comparison of win_t and win_c
temp_c<-temp[which(temp$treatment==0),]
temp_t<-temp[which(temp$treatment==1),]
comp<-PairwiseComp_Obs(data_c=temp_c, data_t=temp_t)
temp.win_t<-comp[[1]]
temp.win_c<-comp[[2]]
# separate treatment and control weight
w_c<-temp[which(temp$treatment==0),"cal.weight"]
w_t<-temp[which(temp$treatment==1),"cal.weight"]
temp.m<-length(w_t)
temp.n<-length(w_c)
#----------------------- Point Estimation -------------------------------------
# create matrix of control and treatment weight
w_cMatrix<-matrix(rep(w_c, temp.m), nrow=temp.m, byrow = T)
w_tMatrix<-matrix(rep(w_t, temp.n), nrow=temp.m)
# calculate IPW estimator U1 and U2
# First kernel: phi_1(X>Y)
U1[i]<-0.5*(1/choose(nrow(temp),2))*sum(temp.win_t*(w_cMatrix*w_tMatrix))
# Second kernel: phi_1(X<Y)
U2[i]<-0.5*(1/choose(nrow(temp),2))*sum(temp.win_c*(w_cMatrix*w_tMatrix))
# calculate weight for each observation
tw[i]<-sum(temp$cal.weight)
}
# calculate the clustered estimator
boot.U1<-sum(tw*U1)/sum(tw)
boot.U2<-sum(tw*U2)/sum(tw)
boot.logWR5[b]<-log(boot.U1/boot.U2)
}
# bootstrap SE
se<-sqrt(var(boot.logWR5))
test_stat<-(logWR-ind_logWR)/se
indicator<-ifelse(abs(test_stat)>qnorm(1-alpha.sig/2), 1, 0)
U1<-U1_IPW5
U2<-U2_IPW5
# data info
info<-paste0("Total ", N, " subjects with ", n.cluster, " clusters. Total control patients = ", n, " total treatment patients = ", m)
}
# 95% CI
ci<-paste("(",round(logWR-1.96*se,3),",",round(logWR+1.96*se,3),")", sep = "")
# calculate P-value
p_value<-(1-pnorm(abs(test_stat)))*2
output<-list(title, info, list.cluster, U1, U2, logWR, se, test_stat, ci, p_value, lam.out)
names(output)<-c("name","info", "clusters","U1", "U2", "logWR", "se", "z", "ci", "p-value", "convergence")
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.