tests/testthat/helpers-logrank.R

###### Author: Ting Ye ######
###### Last modified: Jan 16, 2022 ######
# Use null variance #
data.sort<-function(data.simu){
  n<-dim(data.simu)[1]
  data.simu.order<- data.simu[order(data.simu$t),]
  data.rev<- data.frame(data.simu.order, Y=n:1, Y1=cumsum(data.simu.order$I1[n:1])[n:1], Y0=cumsum(data.simu.order$I0[n:1])[n:1])
  return(data.rev)
}

wlogrank<-function(data.simu){
  n<-dim(data.simu)[1]
  data.rev<-data.sort(data.simu)
  T.seq<-data.rev$t
  T.rep<-c(0,T.seq)[1:n]
  T.diff<-T.seq-T.rep
  same.ind<-which(T.diff==0)
  n_col<-dim(data.rev)[2]
  for(ind in same.ind){
    data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col]
  }
  S.alt<-sum(data.rev$delta*(data.rev$I1*data.rev$Y0-data.rev$I0*data.rev$Y1)/data.rev$Y)
  var.alt<-sum(data.rev$delta*data.rev$Y0*data.rev$Y1/data.rev$Y^2)
  res<-S.alt/sqrt(var.alt)
  return(list(S.alt=S.alt, res=res, var.alt=var.alt))
}

ind_to_factor<-function(data.simu){
  z<-data.simu[,grepl("car_strata",names(data.simu))]
  n_strata<-dim(z)[2]
  n<-dim(z)[1]
  fz<-numeric(n)
  for(i in 1:n_strata){
    fz<-fz+z[,i]*i
  }
  return(as.factor(fz))
}

wlogrank_var_calibrated<-function(data.simu,randomization,p_trt){ # try new version 19 May
  n<-dim(data.simu)[1]
  S.alt<-wlogrank(data.simu)$S.alt
  data.rev<-data.sort(data.simu)
  T.seq<-data.rev$t
  T.rep<-c(0,T.seq)[1:n]
  T.diff<-T.seq-T.rep
  same.ind<-which(T.diff==0)
  n_col<-dim(data.rev)[2]
  for(ind in same.ind){
    data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col]
  }

  strata_z<-data.rev[,grepl("car_strata",names(data.rev))]
  mean_at_risk<-data.rev$delta/data.rev$Y
  cumsum_at_risk<-cumsum(mean_at_risk)
  mean_at_risk_2<-data.rev$delta*data.rev$Y1/(data.rev$Y)^2
  cumsum_at_risk_2<-cumsum(mean_at_risk_2)
  mu_t<-data.rev$Y1/data.rev$Y
  O_i<-data.rev$delta*(data.rev$I1-data.rev$Y1/data.rev$Y)-data.rev$I1*cumsum_at_risk+cumsum_at_risk_2
  O_i1<-data.rev$I1*O_i
  O_i0<- -data.rev$I0*O_i
  strata_z<-data.rev[,grepl("car_strata",names(data.rev))]
  n_strata<-dim(strata_z)[2]
  z<-strata_z
  cond_var_1<-numeric(n_strata)
  cond_var_0<-numeric(n_strata)
  prob_z<-numeric(n_strata)
  cond_exp_1<-numeric(n_strata)
  cond_exp_0<-numeric(n_strata)
  for(i in 1:n_strata){
    cond_var_1[i]<-var(O_i1[z[,i]==1 & data.rev$I1==1])
    cond_var_0[i]<-var(O_i0[z[,i]==1 & data.rev$I0==1])
    prob_z[i]<-mean(z[,i])
    cond_exp_1[i]<-mean(O_i1[z[,i]==1 & data.rev$I1==1])
    cond_exp_0[i]<-mean(O_i0[z[,i]==1 & data.rev$I0==1])
  }
  na.ind<-is.na(cond_var_1+cond_var_0)
  if(length(which(na.ind))!=0) warning("conditional variance in log-rank equals 0")
  cond_var_1<-cond_var_1[!na.ind]
  cond_var_0<-cond_var_0[!na.ind]
  prob_z<-prob_z[!na.ind]
  cond_exp_1<-cond_exp_1[!na.ind]
  cond_exp_0<-cond_exp_0[!na.ind]
  sai_1<-(p_trt*(cond_var_1%*%prob_z)+(1-p_trt)*(cond_var_0%*%prob_z))
  sai_2<-0 # for cabc and permuted_block
  if(randomization=="SR"){
    sai_2<-p_trt*(1-p_trt)*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
  }
  if(randomization =="urn"){
    if(p_trt!=1/2)  stop("For now, urn design is only designed for pi=1/2")
    sai_2<-1/12*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
  }
  var_cal<-n*(sai_1+sai_2)
  T_cal<-S.alt/sqrt(var_cal)
  return(list(S.alt=S.alt, res=T_cal,var_cal=var_cal))
}

# # proposed T_RS in Ye and Shao, 2020, jrssb.
# score_robust<-function(data.simu,randomization,p_trt){
#   data.rev<-data.sort(data.simu)
#   x<-data.rev[,grepl("model",names(data.rev))] #only those has model
#   x<-cbind(x,data.rev[,grepl("car_strata",names(data.rev))][,-1])
#   n<-dim(data.rev)[1]
#   data_sub<-data.frame(t=data.rev$t,delta=data.rev$delta,x)
#   fit<-survival::coxph(survival::Surv(t,delta)~.,data=data_sub)
#   S_0_seq<-exp(fit$coefficients %*% t(x))
#   S_1_seq<-exp(fit$coefficients %*% t(x))*data.rev$I1
#   S_0<-cumsum(S_0_seq[n:1])[n:1]/n
#   S_1<-cumsum(S_1_seq[n:1])[n:1]/n
#   U<-sum(data.rev$delta*(data.rev$I1-S_1/S_0))
#   #mu_t<-data.rev$Y1/data.rev$Y # two choices, both valid
#   mu_t<-S_1/S_0
#   mean_at_risk<-data.rev$delta/(n*S_0)
#   cumsum_at_risk<-cumsum(mean_at_risk)
#   mean_at_risk_2<-data.rev$delta/(n*S_0)*mu_t
#   cumsum_at_risk_2<-cumsum(mean_at_risk_2)
#   O_i<-data.rev$delta*(data.rev$I1-mu_t)-S_1_seq*cumsum_at_risk+S_0_seq*cumsum_at_risk_2
#
#   O_i1<-data.rev$I1*O_i
#   O_i0<- -data.rev$I0*O_i
#
#   strata_z<-data.rev[,grepl("car_strata",names(data.rev))]
#   n_strata<-dim(strata_z)[2]
#   z<-strata_z
#   cond_var_1<-numeric(n_strata)
#   cond_var_0<-numeric(n_strata)
#   prob_z<-numeric(n_strata)
#   cond_exp_1<-numeric(n_strata)
#   cond_exp_0<-numeric(n_strata)
#   for(i in 1:n_strata){
#     cond_var_1[i]<-var(O_i1[z[,i]==1 & data.rev$I1==1])
#     cond_var_0[i]<-var(O_i0[z[,i]==1 & data.rev$I0==1])
#     prob_z[i]<-mean(z[,i])
#     cond_exp_1[i]<-mean(O_i1[z[,i]==1 & data.rev$I1==1])
#     cond_exp_0[i]<-mean(O_i0[z[,i]==1 & data.rev$I0==1])
#   }
#   na.ind<-is.na(cond_var_1+cond_var_0)
#   if(length(which(na.ind))!=0) warning("conditional variance in log-rank equals 0")
#   cond_var_1<-cond_var_1[!na.ind]
#   cond_var_0<-cond_var_0[!na.ind]
#   prob_z<-prob_z[!na.ind]
#   cond_exp_1<-cond_exp_1[!na.ind]
#   cond_exp_0<-cond_exp_0[!na.ind]
#   sai_1<-(p_trt*(cond_var_1%*%prob_z)+(1-p_trt)*(cond_var_0%*%prob_z))
#   sai_2<-0 # for cabc and permuted_block
#   if(randomization=="SR"){
#     sai_2<-p_trt*(1-p_trt)*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
#   }
#   if(randomization =="urn"){
#     if(p_trt!=1/2)  stop("For now, urn design is only designed for pi=1/2")
#     sai_2<-1/12*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
#   }
#   var_cal<-n*(sai_1+sai_2)
#   T_SR<-U/sqrt(var_cal)
#   return(T_SR)
# }


# proposed T_RS in Ye and Shao, 2020, jrssb.
score_robust<-function(data.simu,randomization,p_trt){
  data.rev<-data.sort(data.simu)
  x<-data.rev[,grepl("model",names(data.rev))] #only those has model
  n<-dim(data.rev)[1]
  data_sub<-data.frame(t=data.rev$t,delta=data.rev$delta,x)
  fit<-survival::coxph(survival::Surv(t,delta)~.,data=data_sub)
  S_0_seq<-exp(fit$coefficients %*% t(x))
  S_1_seq<-exp(fit$coefficients %*% t(x))*data.rev$I1
  S_0<-cumsum(S_0_seq[n:1])[n:1]/n
  S_1<-cumsum(S_1_seq[n:1])[n:1]/n
  U<-sum(data.rev$delta*(data.rev$I1-S_1/S_0))
  #mu_t<-data.rev$Y1/data.rev$Y # two choices, both valid
  mu_t<-S_1/S_0
  mean_at_risk<-data.rev$delta/(n*S_0)
  cumsum_at_risk<-cumsum(mean_at_risk)
  mean_at_risk_2<-data.rev$delta/(n*S_0)*mu_t
  cumsum_at_risk_2<-cumsum(mean_at_risk_2)
  O_i<-data.rev$delta*(data.rev$I1-mu_t)-S_1_seq*cumsum_at_risk+S_0_seq*cumsum_at_risk_2

  O_i1<-data.rev$I1*O_i
  O_i0<- -data.rev$I0*O_i

  strata_z<-data.rev[,grepl("car_strata",names(data.rev))]
  n_strata<-dim(strata_z)[2]
  z<-strata_z
  cond_var_1<-numeric(n_strata)
  cond_var_0<-numeric(n_strata)
  prob_z<-numeric(n_strata)
  cond_exp_1<-numeric(n_strata)
  cond_exp_0<-numeric(n_strata)
  for(i in 1:n_strata){
    cond_var_1[i]<-var(O_i1[z[,i]==1 & data.rev$I1==1])
    cond_var_0[i]<-var(O_i0[z[,i]==1 & data.rev$I0==1])
    prob_z[i]<-mean(z[,i])
    cond_exp_1[i]<-mean(O_i1[z[,i]==1 & data.rev$I1==1])
    cond_exp_0[i]<-mean(O_i0[z[,i]==1 & data.rev$I0==1])
  }
  na.ind<-is.na(cond_var_1+cond_var_0)
  if(length(which(na.ind))!=0) warning("conditional variance in log-rank equals 0")
  cond_var_1<-cond_var_1[!na.ind]
  cond_var_0<-cond_var_0[!na.ind]
  prob_z<-prob_z[!na.ind]
  cond_exp_1<-cond_exp_1[!na.ind]
  cond_exp_0<-cond_exp_0[!na.ind]
  sai_1<-(p_trt*(cond_var_1%*%prob_z)+(1-p_trt)*(cond_var_0%*%prob_z))
  sai_2<-0 # for cabc and permuted_block
  if(randomization=="SR"){
    # if(p_trt!=1/2)  stop("For now, SR is only designed for pi=1/2")
    # sai_2<-1/4*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
    sai_2<-p_trt*(1-p_trt)*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
  }
  # if(randomization =="urn"){
  #   if(p_trt!=1/2)  stop("For now, urn design is only designed for pi=1/2")
  #   sai_2<-1/12*sum((cond_exp_0+cond_exp_1)^2 * prob_z)
  # }
  var_cal<-n*(sai_1+sai_2)
  T_SR<-U/sqrt(var_cal)
  return(list(T_SR=T_SR,
              se=sqrt(var_cal),
              U=U))
}


# score test T_S in Ye and Shao, 2020, jrssb.
score<-function(data.simu){
  data.rev<-data.sort(data.simu)
  x<-data.rev[,grepl("model",names(data.rev))] #only those has model
  x<-cbind(x,data.rev[,grepl("car_strata",names(data.rev))][,-1])
  n<-dim(data.rev)[1]
  data_sub<-data.frame(t=data.rev$t,delta=data.rev$delta,x)
  fit<-survival::coxph(survival::Surv(t,delta)~.,data=data_sub)
  S_0_seq<-exp(fit$coefficients %*% t(x))
  S_1_seq<-exp(fit$coefficients %*% t(x))*data.rev$I1
  S_0<-cumsum(S_0_seq[n:1])[n:1]/n
  S_1<-cumsum(S_1_seq[n:1])[n:1]/n
  U<-sum(data.rev$delta*(data.rev$I1-S_1/S_0))
  #mu_t<-data.rev$Y1/data.rev$Y # two choices, both valid
  mu_t<-S_1/S_0
  mean_at_risk<-data.rev$delta/(n*S_0)
  cumsum_at_risk<-cumsum(mean_at_risk)
  mean_at_risk_2<-data.rev$delta/(n*S_0)*mu_t
  cumsum_at_risk_2<-cumsum(mean_at_risk_2)
  O_i<-data.rev$delta*(data.rev$I1-mu_t)-S_1_seq*cumsum_at_risk+S_0_seq*cumsum_at_risk_2
  var_cal<-mean(O_i^2)*n
  T_WR<-U/sqrt(var_cal)
  return(T_WR)
}

# numerator of T_S
score_numerator<-function(data.simu){
  data.rev<-data.sort(data.simu)
  x<-data.rev[,grepl("model",names(data.rev))] #only those has model
  x<-cbind(x,data.rev[,grepl("car_strata",names(data.rev))][,-1])
  n<-dim(data.rev)[1]
  data_sub<-data.frame(t=data.rev$t,delta=data.rev$delta,x)
  fit<-survival::coxph(survival::Surv(t,delta)~.,data=data_sub)
  S_0_seq<-exp(fit$coefficients %*% t(x))
  S_1_seq<-exp(fit$coefficients %*% t(x))*data.rev$I1
  S_0<-cumsum(S_0_seq[n:1])[n:1]/n
  S_1<-cumsum(S_1_seq[n:1])[n:1]/n
  U<-sum(data.rev$delta*(data.rev$I1-S_1/S_0))
  return(U)
}

#########################
#   Added Sep 5, 2021   #
#########################

# Using log-rank score as the derived outcome (Tangen-Koch, 1999)
logrank_TK_score<-function(data.simu,p_trt,randomization){
  n<-dim(data.simu)[1]
  data.simu$fz<-ind_to_factor(data.simu)
  data.rev<-data.sort(data.simu)
  T.seq<-data.rev$t
  T.rep<-c(0,T.seq)[1:n]
  T.diff<-T.seq-T.rep
  same.ind<-which(T.diff==0)
  n_col<-dim(data.rev)[2]
  for(ind in same.ind){
    data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col]
  }
  n<-dim(data.rev)[1]
  data.rev$R<-with(data.rev,delta-cumsum(data.rev$delta/data.rev$Y))

  x.model<-as.matrix(data.rev[,grepl("model",names(data.rev)),drop=FALSE]) #only those has model
  x.mat<-model.matrix(~factor(fz)+x.model,data=data.rev)[,-1]
  x.centered<-sweep(x.mat, 2, colMeans(x.mat))
  fit1<-lm(R~factor(fz)+x.model[data.rev$I1==1,],data=data.rev[data.rev$I1==1,])
  fit0<-lm(R~factor(fz)+x.model[data.rev$I1==0,],data=data.rev[data.rev$I1==0,])
  b1<-fit1$coefficients[-1]
  b0<-fit0$coefficients[-1]
  # calculating beta0 and beta1 for variance estimation
  mu_t<-data.rev$Y1/data.rev$Y
  data.rev$O.hat<-data.rev$delta*(data.rev$I1-data.rev$Y1/data.rev$Y)-
    data.rev$I1*cumsum(data.rev$delta/data.rev$Y)+cumsum(data.rev$delta*data.rev$Y1/(data.rev$Y)^2)
  data.rev$O.hat[data.rev$I0==1]<- (-1)*data.rev$O.hat[data.rev$I0==1]
  fit1.Ohat<-lm(O.hat~factor(fz)+x.model[data.rev$I1==1,],data=data.rev[data.rev$I1==1,])
  fit0.Ohat<-lm(O.hat~factor(fz)+x.model[data.rev$I1==0,],data=data.rev[data.rev$I1==0,])
  beta1.Ohat<-fit1.Ohat$coefficients[-1]
  beta0.Ohat<-fit0.Ohat$coefficients[-1]

  ind.na<-which(is.na(b1))
  if(length(ind.na)==0){
    U_TK<-sum(data.rev$I1*(data.rev$R-t((x.centered)%*% b1 )) -
                data.rev$I0*(data.rev$R-t((x.centered)%*% b0)))/n/2
  }else{
    warning("Removing model variables that are linearly dependent with the stratification variables.")
    b1<-b1[-ind.na]
    b0<-b0[-ind.na]
    x.mat<-x.mat[,-ind.na]
    x.centered<-x.centered[,-ind.na]
    beta1.Ohat<-beta1.Ohat[-ind.na]
    beta0.Ohat<-beta0.Ohat[-ind.na]
    U_TK<-sum(data.rev$I1*(data.rev$R-t((x.centered)%*% b1 )) -
                data.rev$I0*(data.rev$R-t((x.centered)%*% b0)))/n/2
  }
  var_CL.null<-(sum(data.rev$delta*data.rev$Y0*data.rev$Y1/data.rev$Y^2)/n-
                  p_trt*(1-p_trt)*(beta1.Ohat+beta0.Ohat) %*% var(x.mat) %*% (beta1.Ohat+beta0.Ohat) )/n
  var_TK.null<-var_CL.null+p_trt*(1-p_trt)*(beta1.Ohat-b1/2+beta0.Ohat-b0/2)%*%var(x.mat)%*%(beta1.Ohat-b1/2+beta0.Ohat-b0/2)/n
  if(randomization%in% c("permuted_block","SR","CABC")){
    se<-sqrt(var_TK.null)
  }else{
    se<-NA
  }
  return(list(U_TK=U_TK,se=se))
}


# covariate adjusted logrank proposed in Ye, Yi, Shao (2022)
#'@title Ting Ye's original code
#'@export
covariate_adjusted_logrank<-function(data.simu,p_trt){
  n<-dim(data.simu)[1]
  data.simu$fz<-ind_to_factor(data.simu)
  data.rev<-data.sort(data.simu)
  T.seq<-data.rev$t
  T.rep<-c(0,T.seq)[1:n]
  T.diff<-T.seq-T.rep
  same.ind<-which(T.diff==0)
  n_col<-dim(data.rev)[2]
  for(ind in same.ind){
    data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col]
  }
  n<-dim(data.rev)[1]

  x.model<-as.matrix(data.rev[,grepl("model",names(data.rev)),drop=FALSE]) #only those has model
  x.mat<-model.matrix(~factor(fz)+x.model,data=data.rev)[,-1]
  x.centered<-sweep(x.mat, 2, colMeans(x.mat))
  # calculating beta0 and beta1 for variance estimation
  mu_t<-data.rev$Y1/data.rev$Y
  data.rev$O.hat<-data.rev$delta*(data.rev$I1-data.rev$Y1/data.rev$Y)-
    data.rev$I1*cumsum(data.rev$delta/data.rev$Y)+cumsum(data.rev$delta*data.rev$Y1/(data.rev$Y)^2)
  data.rev$O.hat[data.rev$I0==1]<- (-1)*data.rev$O.hat[data.rev$I0==1]
  fit1.Ohat<-lm(O.hat~factor(fz)+x.model[data.rev$I1==1,],data=data.rev[data.rev$I1==1,])
  fit0.Ohat<-lm(O.hat~factor(fz)+x.model[data.rev$I1==0,],data=data.rev[data.rev$I1==0,])
  beta1.Ohat<-fit1.Ohat$coefficients[-1]
  beta0.Ohat<-fit0.Ohat$coefficients[-1]

  ind.na<-which(is.na(beta1.Ohat))
  if(length(ind.na)==0){
    U_CL<-sum(data.rev$I1*(data.rev$O.hat-t((x.centered)%*% beta1.Ohat )) -
                data.rev$I0*(data.rev$O.hat-t((x.centered)%*% beta0.Ohat)))/n
  }else{
    warning("Removing model variables that are linearly dependent with the stratification variables.")
    x.mat<-x.mat[,-ind.na]
    x.centered<-x.centered[,-ind.na]
    beta1.Ohat<-beta1.Ohat[-ind.na]
    beta0.Ohat<-beta0.Ohat[-ind.na]
    U_CL<-sum(data.rev$I1*(data.rev$O.hat-t((x.centered)%*% beta1.Ohat )) -
                data.rev$I0*(data.rev$O.hat-t((x.centered)%*% beta0.Ohat)))/n
  }

  score_logrank_anhecova_var<-function(data.rev,p_trt,fit1.Ohat,fit0.Ohat,beta1.Ohat,beta0.Ohat){
    my.var<-(p_trt*var(fit1.Ohat$residuals)+(1-p_trt)*var(fit0.Ohat$residuals)+
               (p_trt*beta1.Ohat-(1-p_trt)*beta0.Ohat) %*% var(x.mat) %*% (p_trt*beta1.Ohat-(1-p_trt)*beta0.Ohat))/n
    my.var.null<-(sum(data.rev$delta*data.rev$Y0*data.rev$Y1/data.rev$Y^2)/n-
                    p_trt*(1-p_trt)*(beta1.Ohat+beta0.Ohat) %*% var(x.mat) %*% (beta1.Ohat+beta0.Ohat) )/n
    return(list(my.var=my.var,my.var.null=my.var.null))
  }
  tmp<-score_logrank_anhecova_var(data.rev,p_trt,fit1.Ohat,fit0.Ohat,beta1.Ohat,beta0.Ohat)
  se<-sqrt(tmp$my.var)
  se.null<-sqrt(tmp$my.var.null)
  return(list(U_CL=U_CL,se=se.null,se.orig=se))
}


# covariate adjusted stratified logrank proposed in Ye, Yi, Shao (2022)
covariate_adjusted_stratified_logrank<-function(data.simu,p_trt){
  n<-dim(data.simu)[1]
  data.simu$fz<-ind_to_factor(data.simu)
  fz.n<-unique(data.simu$fz)
  data.rev<-data.sort(data.simu[data.simu$fz==fz.n[1],])
  T.seq<-data.rev$t
  T.rep<-c(0,T.seq)[1:length(T.seq)]
  T.diff<-T.seq-T.rep
  same.ind<-which(T.diff==0)
  n_col<-dim(data.rev)[2]
  for(ind in same.ind){
    data.rev[ind,(n_col-2):n_col]<-data.rev[ind-1,(n_col-2):n_col]
  }
  mu_t<-data.rev$Y1/data.rev$Y
  data.rev$O.hat<-data.rev$delta*(data.rev$I1-data.rev$Y1/data.rev$Y)-
    data.rev$I1*cumsum(data.rev$delta/data.rev$Y)+cumsum(data.rev$delta*data.rev$Y1/(data.rev$Y)^2)
  data.rev$O.hat[data.rev$I0==1]<- (-1)*data.rev$O.hat[data.rev$I0==1]

  # calculate stratum-specific O_zij
  for(z in fz.n[-1]){
    data.tmp<-data.sort(data.simu[data.simu$fz==z,])
    T.seq<-data.tmp$t
    T.rep<-c(0,T.seq)[1:length(T.seq)]
    T.diff<-T.seq-T.rep
    same.ind<-which(T.diff==0)
    n_col<-dim(data.tmp)[2]
    for(ind in same.ind){
      data.tmp[ind,(n_col-2):n_col]<-data.tmp[ind-1,(n_col-2):n_col]
    }
    mu_t<-data.tmp$Y1/data.tmp$Y
    data.tmp$O.hat<-data.tmp$delta*(data.tmp$I1-data.tmp$Y1/data.tmp$Y)-
      data.tmp$I1*cumsum(data.tmp$delta/data.tmp$Y)+cumsum(data.tmp$delta*data.tmp$Y1/(data.tmp$Y)^2)
    data.tmp$O.hat[data.tmp$I0==1]<- (-1)*data.tmp$O.hat[data.tmp$I0==1]
    data.rev<-rbind(data.rev,data.tmp)
  }
  n<-dim(data.rev)[1]

  x.model<-as.matrix(data.rev[,grepl("model",names(data.rev)),drop=FALSE]) #only those has model

  fit.anhc1<-lm(O.hat~factor(fz)+x.model[data.rev$I1==1,],data=data.rev[data.rev$I1==1,])
  fit.anhc0<-lm(O.hat~factor(fz)+x.model[data.rev$I1==0,],data=data.rev[data.rev$I1==0,])
  which.model<-which(grepl("x.model",names(fit.anhc1$coefficients)))
  which.model.notNA<-which(!is.na(fit.anhc1$coefficients[which.model]))
  if(length(which.model.notNA)==0){
    stop("The model variables are linearly dependent on Z. The covariate-adjusted stratified log-rank is the same as the stratified log-rank.")
  }else{
    fit.anhc1.coefx<-fit.anhc1$coefficients[which.model[which.model.notNA]]
    fit.anhc0.coefx<-fit.anhc0$coefficients[which.model[which.model.notNA]]

    x.coefx<-model.matrix(~factor(fz)+as.matrix(x.model),data=data.rev)[,which.model[which.model.notNA],drop=FALSE]
    for(z in fz.n){
      x.coefx[data.rev$fz==z,]<-sweep(x.coefx[data.rev$fz==z,,drop=FALSE], 2, colMeans(x.coefx[data.rev$fz==z,,drop=FALSE]))
    }
    x.coefx.centered.byZ<-x.coefx
    U_CSL<- sum(data.rev$I1*data.rev$O.hat- data.rev$I0*data.rev$O.hat)-
      sum(data.rev$I1*as.vector(x.coefx.centered.byZ %*% fit.anhc1.coefx) - data.rev$I0*as.vector(x.coefx.centered.byZ %*% fit.anhc0.coefx))
    U_CSL<-U_CSL/n
  }
  ### calculate variance ###
  var_CSL<-0
  for(ind in 1:length(fz.n)){
    z<-fz.n[ind]
    Bz1<-var(data.rev$O.hat[data.rev$fz==z & data.rev$I1==1]-x.coefx[data.rev$fz==z & data.rev$I1==1,,drop=FALSE]%*%fit.anhc1.coefx)
    Bz0<-var(data.rev$O.hat[data.rev$fz==z & data.rev$I1==0]-x.coefx[data.rev$fz==z & data.rev$I1==0,,drop=FALSE]%*%fit.anhc0.coefx)
    if(is.na(Bz1) | is.na(Bz0)){
      warning("conditional variance equals 0")
      Bz1<-ifelse(is.na(Bz1),0,Bz1)
      Bz0<-ifelse(is.na(Bz0),0,Bz0)
    }
    SigmaXz<-var(x.coefx[data.rev$fz==z,,drop=FALSE])
    pz<-length(which(data.rev$fz==z))/n
    var_CSL<-var_CSL+pz*(p_trt*Bz1+(1-p_trt)*Bz0)+
      pz*(p_trt*fit.anhc1.coefx-(1-p_trt)*fit.anhc0.coefx) %*% SigmaXz %*% (p_trt*fit.anhc1.coefx-(1-p_trt)*fit.anhc0.coefx)
  }


  tmp_SL_var<-0
  tmp_CSL_var<-0
  for(z in fz.n){
    tmp_SL_var<-tmp_SL_var+wlogrank(data.rev[data.rev$fz==z,])$var.alt
    pz<-length(which(data.rev$fz==z))/n
    SigmaXz<-var(x.coefx[data.rev$fz==z,,drop=FALSE])
    tmp_CSL_var<-tmp_CSL_var+wlogrank(data.rev[data.rev$fz==z,])$var.alt/n -
      p_trt*(1-p_trt)*(fit.anhc1.coefx+fit.anhc0.coefx) %*% SigmaXz %*%(fit.anhc1.coefx+fit.anhc0.coefx)*pz
  }
  se<-sqrt(var_CSL/n)
  se.null<-sqrt(tmp_CSL_var/n)
  return(list(U_CSL=U_CSL,se=se.null,se.orig=se))
}

# Covariate adjusted logrank proposed in Ye, Yi, Shao (2022)
#' @title Ting Ye's original code
#' @export
covariate_adjusted_logrank <- function(data.simu, p_trt, Z=TRUE) {
  n <- dim(data.simu)[1]
  if(Z) data.simu$fz <- ind_to_factor(data.simu)
  data.rev <- data.sort(data.simu)
  T.seq <- data.rev$t
  T.rep <- c(0, T.seq)[1:n]
  T.diff <- T.seq - T.rep
  same.ind <- which(T.diff == 0)
  n_col <- dim(data.rev)[2]
  for (ind in same.ind) {
    data.rev[ind, (n_col - 2):n_col] <- data.rev[ind - 1, (n_col - 2):n_col]
  }
  n <- dim(data.rev)[1]

  x.model <-
    as.matrix(data.rev[, grepl("model", names(data.rev)), drop = FALSE]) #only those has model
  if(Z){
    x.mat <- model.matrix( ~ factor(fz) + x.model, data = data.rev)[, -1]
  } else {
    x.mat <- model.matrix( ~ x.model, data = data.rev)[, -1]
  }

  # Need this if there's only one covariate and no car_strata
  x.mat <- as.matrix(x.mat)
  x.centered <- sweep(x.mat, 2, colMeans(x.mat))
  # calculating beta0 and beta1 for variance estimation
  mu_t <- data.rev$Y1 / data.rev$Y
  data.rev$O.hat <-
    data.rev$delta * (data.rev$I1 - data.rev$Y1 / data.rev$Y) -
    data.rev$I1 * cumsum(data.rev$delta / data.rev$Y) + cumsum(data.rev$delta *
                                                                 data.rev$Y1 / (data.rev$Y) ^ 2)
  data.rev$O.hat[data.rev$I0 == 1] <-
    (-1) * data.rev$O.hat[data.rev$I0 == 1]
  if(Z){
    fit1.Ohat <-
      lm(O.hat ~ factor(fz) + x.model[data.rev$I1 == 1, ], data = data.rev[data.rev$I1 ==
                                                                             1, ])
    fit0.Ohat <-
      lm(O.hat ~ factor(fz) + x.model[data.rev$I1 == 0, ], data = data.rev[data.rev$I1 ==
                                                                             0, ])
  } else {
    fit1.Ohat <-
      lm(O.hat ~ x.model[data.rev$I1 == 1, ], data = data.rev[data.rev$I1 ==
                                                                1, ])
    fit0.Ohat <-
      lm(O.hat ~ x.model[data.rev$I1 == 0, ], data = data.rev[data.rev$I1 ==
                                                                0, ])
  }

  beta1.Ohat <- fit1.Ohat$coefficients[-1]
  beta0.Ohat <- fit0.Ohat$coefficients[-1]

  ind.na <- which(is.na(beta1.Ohat))
  if (length(ind.na) == 0) {
    U_CL <-
      sum(data.rev$I1 * (data.rev$O.hat - t((x.centered) %*% beta1.Ohat)) -
            data.rev$I0 * (data.rev$O.hat - t((x.centered) %*% beta0.Ohat))) /
      n
  } else{
    warning(
      "Removing model variables that are linearly dependent with the stratification variables."
    )
    x.mat <- x.mat[, -ind.na]
    x.centered <- x.centered[, -ind.na]
    beta1.Ohat <- beta1.Ohat[-ind.na]
    beta0.Ohat <- beta0.Ohat[-ind.na]
    U_CL <-
      sum(data.rev$I1 * (data.rev$O.hat - t((x.centered) %*% beta1.Ohat)) -
            data.rev$I0 * (data.rev$O.hat - t((x.centered) %*% beta0.Ohat))) /
      n
  }

  score_logrank_anhecova_var <-
    function(data.rev,
             p_trt,
             fit1.Ohat,
             fit0.Ohat,
             beta1.Ohat,
             beta0.Ohat) {
      my.var <-
        (
          p_trt * var(fit1.Ohat$residuals) + (1 - p_trt) * var(fit0.Ohat$residuals) +
            (p_trt * beta1.Ohat - (1 - p_trt) * beta0.Ohat) %*% var(x.mat) %*% (p_trt *
                                                                                  beta1.Ohat - (1 - p_trt) * beta0.Ohat)
        ) / n
      my.var.null <-
        (
          sum(data.rev$delta * data.rev$Y0 * data.rev$Y1 / data.rev$Y ^ 2) / n -
            p_trt * (1 - p_trt) * (beta1.Ohat + beta0.Ohat) %*% var(x.mat) %*% (beta1.Ohat +
                                                                                  beta0.Ohat)
        ) / n
      return(list(my.var = my.var, my.var.null = my.var.null))
    }
  tmp <-
    score_logrank_anhecova_var(data.rev, p_trt, fit1.Ohat, fit0.Ohat, beta1.Ohat, beta0.Ohat)
  se <- sqrt(tmp$my.var)
  se.null <- sqrt(tmp$my.var.null)
  return(list(
    U_CL = U_CL,
    se = se.null,
    se.orig = se
  ))
}

Try the RobinCar package in your browser

Any scripts or data that you put into this service are public.

RobinCar documentation built on May 29, 2024, 3:03 a.m.