R/estimating_equations_used.R

Defines functions AFTivIPCWScorePreKM AFTScorePre AFTivScorePre AFTivIPCWScorePre roughResidSDEstAFTIV roughResidSDEstAFT AFT2SLSScorePre AFT2SLSScoreSmoothPre AFT2SLSScoreAllPre AFT2SLSScoreSmoothAllPre AFTivIPCWScoreSmooth AFTivScoreSmoothPre genIPCWNumDenomMultivar genIPCWNumDenomMultivar2 AFTScoreSmoothPre evalAFTScore evalAFTivScore evalRnTAFTivScore evalAFT2SLSScorePrec genKMCensoringFuncVar genKMCensoringFunc genIPCWNumDenom evalAFTivIPCWScorePrec evalAFTivIPCWScorePrecSquare evalAFTivIPCWScorePrecNormalized

#' @export
AFTivIPCWScorePreKM <- function(beta, data.simu, GC, multiplier.wts = NULL)
{ 
  if (class(data.simu) != "survival.data") stop("Need to use data generated by simIVMultivarSurvivalData")
  
  #the score function for the AFT model with instrumental variables and Inverse Probability Censoring Weighting
  survival <- data.simu$survival
  X        <- data.simu$X
  Z        <- data.simu$Z
  n        <- nrow(X)
  
  if (!is.null(multiplier.wts)) 
  {
    stopifnot(length(multiplier.wts) == n)
  }
  
  nvars <- ncol(X)
  
  #transform to log-time
  if (!is.null(survival$t)) 
  {
    #transform to log-time
    if (is.null(survival$log.t)) 
    {
      tt <- log(survival$t)
    } else 
    {
      tt <- survival$log.t
    }
  }
  
  #creates G_c(T_i) term in the IPCW estimating equation
  GCT <- GC( survival$t )
  
  bX  <- X %*% beta
  
  #store the T_i - bX_i term (error)
  err <- survival$t - bX
  
  
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]  
  order.idx <- order(err)
  survival  <- survival[order.idx,]
  tt        <- tt[order.idx]
  bX        <- bX[order.idx]
  err       <- err[order.idx]
  X         <- as.matrix(X[order.idx,])
  Z         <- as.matrix(Z[order.idx,]) 
  
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (GCT != 0)
  GCT[GCT == 0]  <- 1
  

  #generate empty vector to return eventually
  ret.vec <- numeric(nvars)
  
  
  at.risk.terms <- nrow(data.simu):1
  
  if (!is.null(multiplier.wts)) 
  {
    Zm <- Z * multiplier.wts
    #first col is as.risk.terms, remaining are at.risk.Z.terms
    at.risk.mat <- genIPCWNumDenomMultivar2(bX, Zm, err, GC)
    
    for (i in 1:nvars) 
    {
      #return the score   
      #ret.vec[i] <- sum(zero.indicator * (survival$delta / GCT) * (at.risk.mat[,1] * Zm[,i] - at.risk.mat[,i + 1])) / sqrt(n)
      
      ret.vec[i] <- sum(zero.indicator * (survival$delta / GCT) * at.risk.terms * 
                          (1 * Zm[,i] - at.risk.mat[,i + 1] / at.risk.mat[,1])) / sqrt(n)
      
    }
  } else 
  {
    #first col is as.risk.terms, remaining are at.risk.Z.terms
    at.risk.mat <- genIPCWNumDenomMultivar2(bX, Z, err, GC)
    
    for (i in 1:nvars) 
    {
      #return the score   
      #ret.vec[i] <- sum(zero.indicator * (survival$delta / GCT) * (at.risk.mat[,1] * Z[,i] - at.risk.mat[,i + 1])) / sqrt(n)
      
      
      ret.vec[i] <- sum(zero.indicator * (survival$delta / GCT) * at.risk.terms * 
                          (1 * Z[,i] - at.risk.mat[,i + 1] / at.risk.mat[,1])) / sqrt(n)
      
      ## univar:
      
      ## at.risk.terms <- nrow(data.simu):1
      ##     return(sum(zero.indicator * (data.simu$delta / data.simu$GCT) * at.risk.terms *
      ## (data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms / data.simu$IPCW.at.risk.terms)) / (sqrt(nn)))
    }
  }
  
  ret.vec
  
}

#' @export
AFTScorePre <- function(beta, survival, X, ZXmat, multiplier.wts = NULL)
{ 
  #the score function for the AFT model
  
  n <- nrow(X)
  if (!is.null(multiplier.wts)) 
  {
    stopifnot(length(multiplier.wts) == n)
  }
  nvars <- ncol(X)
  
  if (!is.null(survival$t)) 
  {
    #transform to log-time
    if (is.null(survival$log.t)) {survival$log.t <- log(survival$t)}
  }
  
  #store the T_i - bX_i term (error)
  survival$err <- survival$log.t - X %*% beta
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]  
  order.idx <- order(survival$err)
  survival  <- survival[order.idx,] 
  X         <- as.matrix(X[order.idx,]) 
  
  #the denominator of the at-risk comparison term  
  at.risk.terms <- (n:1) / n
  
  ret.vec <- numeric(nvars)
  if (!is.null(multiplier.wts)) 
  {
    for (i in 1:nvars) 
    {
      # multiply by weights  
      Xm <- X[,i] * multiplier.wts
      
      #the numerator of the at-risk comparison term
      at.risk.Z.terms <- cumsumRev(Xm) / n
      
      #return the score   
      ret.vec[i] <- sum(survival$delta * (at.risk.terms * Xm - at.risk.Z.terms)) / sqrt(n)
    }  
  } else 
  {
    for (i in 1:nvars) 
    {
      #the numerator of the at-risk comparison term  
      at.risk.Z.terms <- cumsumRev(X[,i]) / n
      
      #return the score   
      ret.vec[i] <- sum(survival$delta * (at.risk.terms * (X[,i]) - at.risk.Z.terms)) / sqrt(n)
    }  
  }
  ret.vec
}

attr(AFTScorePre, "name") <- "AFTScorePre"


#' @export
AFTivScorePre <- function(beta, survival, X, ZXmat, multiplier.wts = NULL)
{ 
  #the score function for the AFT model
  
  n <- nrow(X)
  if (!is.null(multiplier.wts)) 
  {
    stopifnot(length(multiplier.wts) == n)
  }
  nvars <- ncol(X)
  
  if (!is.null(survival$t)) 
  {
    #transform to log-time
    if (is.null(survival$log.t)) {survival$log.t <- log(survival$t)}
  }
  
  #store the T_i - bX_i term (error)
  survival$err = survival$log.t - X %*% beta
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]  
  order.idx <- order(survival$err)
  survival  <- survival[order.idx,] 
  X         <- as.matrix(X[order.idx,])
  ZXmat     <- as.matrix(ZXmat[order.idx,]) 
  
  #the denominator of the at-risk comparison term  
  at.risk.terms <- (n:1) / n
  
  ret.vec <- numeric(nvars)
  if (is.null(multiplier.wts)) 
  {
    for (i in 1:nvars)
    {
      #the numerator of the at-risk comparison term  
      at.risk.Z.terms <- cumsumRev(ZXmat[,i]) / n
      
      #return the score   
      ret.vec[i] <- sum(survival$delta * (at.risk.terms * ZXmat[,i] - at.risk.Z.terms)) / sqrt(n)
    }  
  } else 
  {
    for (i in 1:nvars) 
    {
      
      ZXm <- ZXmat[,i] * multiplier.wts
      
      #the numerator of the at-risk comparison term  
      at.risk.Z.terms <- cumsumRev(ZXm) / n
      
      #return the score   
      ret.vec[i] <- sum(survival$delta * (at.risk.terms * ZXm - at.risk.Z.terms)) / sqrt(n)
    }   
  }
  ret.vec
}
attr(AFTivScorePre, "name") <- "AFTivScorePre"








#' @export
AFTivIPCWScorePre <- function(beta, survival, X, ZXmat, Z, GC, conf.x.loc = conf.x.loc, multiplier.wts = NULL)
{ 
  
  #the score function for the AFT model with instrumental variables and Inverse Probability Censoring Weighting
  n <- nrow(X)
  if (!is.null(multiplier.wts)) 
  {
    stopifnot(length(multiplier.wts) == n)
  }
  nvars <- ncol(X)
  
  if (!is.null(survival$t)) 
  {
    #transform to log-time
    if (is.null(survival$log.t)) {survival$log.t <- log(survival$t)}
  }
  
  #creates G_c(T_i) term in the IPCW estimating equation
  if (attr(GC, "cox"))
  {
    survival$GCT <- GC(exp(survival$log.t), X = data.matrix(cbind(X, Z)))
  } else 
  {
    survival$GCT <- GC(exp(survival$log.t))
  }
  
  #store the T_i - bX_i term (error)
  xbeta <- X %*% beta
  survival$err <- survival$log.t - xbeta
  
  survival$bX  <- xbeta
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]  
  order.idx    <- order(survival$err)
  survival     <- survival[order.idx,] 
  X            <- as.matrix(X[order.idx,])
  #Z <- as.matrix(Z[order.idx,])
  ZXmat        <- as.matrix(ZXmat[order.idx,])
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (survival$GCT != 0)
  survival$GCT[which(survival$GCT == 0)] <- 1
  len.conf.x.loc <- length(conf.x.loc)
  at.risk.terms  <- n:1
  
  if (is.null(multiplier.wts)) 
  {
    #first col is as.risk.terms, remaining are at.risk.Z.terms
    at.risk.mat <- genIPCWNumDenomMultivar(survival, ZXmat, Z, X, GC, conf.x.loc) #/ n
    
    #generate empty vector to return eventually
    ret.vec <- numeric(nvars)
    
    ## here's what it looks like in the slow, loop way
    # for (i in 1:nvars) 
    # {
    #   #return the score   
    #   # ret.vec[i] <- sum(zero.indicator * (survival$delta / survival$GCT) * 
    #   #                     (at.risk.terms * ZXmat[,i]/n - at.risk.mat[,i + 1]/n)) / sqrt(n)
    #   ret.vec[i] <- sum(zero.indicator * (survival$delta / survival$GCT) * 
    #                       (ZXmat[,i]/n - (at.risk.mat[,i + 1]/n)/at.risk.terms  )) / sqrt(n)
    # }
    
    #### PREVIOUS
    #ret.vec <- mean(at.risk.terms) * colSums(zero.indicator * (survival$delta / survival$GCT) *
    #                    (ZXmat/n - (at.risk.mat[,-1,drop = FALSE]/n) / at.risk.terms)  ) / sqrt(n)
    
    eemat <- (survival$delta) * at.risk.mat[,1] * 
      (ZXmat/n - apply((at.risk.mat[,-1,drop = FALSE]/n), 2, function(xc) xc/at.risk.mat[,1]))
    
    # for (ii in 1:NCOL(eemat))
    # {
    #   eemat[,ii] <- zero.indicator * eemat[,ii] / survival$GCT
    # }
    
    for (ii in 1:length(conf.x.loc))
    {
      eemat[,conf.x.loc[ii]] <- zero.indicator * eemat[,conf.x.loc[ii]] / survival$GCT
    }
    
    ret.vec <- mean(at.risk.terms) * colSums( eemat ) / sqrt(n)

    
    
    # ret.vec <- colSums(zero.indicator * (survival$delta / survival$GCT) *
    #                      (at.risk.terms * ZXmat/n - (at.risk.mat[,-1,drop = FALSE]/n))  ) / sqrt(n)
  } else 
  {
    ZXmatm <- ZXmat * multiplier.wts
    #first col is as.risk.terms, remaining are at.risk.Z.terms
    at.risk.mat <- genIPCWNumDenomMultivar(survival, ZXmatm, Z, X, GC, conf.x.loc) #/ n
    
    #generate empty vector to return eventually
    ret.vec <- numeric(nvars)
    # for (i in 1:nvars) 
    # {
    #   #return the score   
    #   ret.vec[i] <- sum(zero.indicator * (survival$delta / survival$GCT) * 
    #                       (at.risk.terms * ZXmat[,i]/n - at.risk.mat[,i + 1]/n)) / sqrt(n) # at.risk.mat[,1]
    # }
    
    ## PREVIOUS
    # ret.vec <- mean(at.risk.terms) * colSums(zero.indicator * (survival$delta / survival$GCT) *
    #                      (ZXmat/n - ((multiplier.wts / at.risk.terms) * at.risk.mat[,-1,drop = FALSE] / n))  ) / sqrt(n)
    
    eemat <- (survival$delta) * at.risk.mat[,1] * 
      (ZXmatm/n - ((multiplier.wts) * apply(at.risk.mat[,-1,drop = FALSE] / n, 2, function(xc) xc/at.risk.mat[,1])))
    
    # for (ii in 1:NCOL(eemat))
    # {
    #   eemat[,ii] <- zero.indicator * eemat[,ii] / ifelse(survival$GCT==0, 1, survival$GCT)
    # }
    
    for (ii in 1:length(conf.x.loc))
    {
      eemat[,conf.x.loc[ii]] <- zero.indicator * eemat[,conf.x.loc[ii]] / ifelse(survival$GCT==0, 1, survival$GCT)
    }
    
    ret.vec <- mean(at.risk.terms) * colSums( eemat ) / sqrt(n)
    
    # ret.vec <- colSums(zero.indicator * (survival$delta / survival$GCT) *
    #                      (at.risk.terms * ZXmat/n - ((multiplier.wts / 1) * at.risk.mat[,-1,drop = FALSE] / n))  ) / sqrt(n)
  }
  
  ret.vec
  
}

attr(AFTivIPCWScorePre, "name") <- "AFTivIPCWScorePre"

###############################################
###     Smoothed Estimating Equations       ###
###############################################

## some utility functions for smoothed eeqns

roughResidSDEstAFTIV <- function(dat) 
{
  # returns rough estimate of sd of residuals
  # for smoothed rank estimator for AFT-IV
  lm.fit <- lm(X ~ Z, data = dat)
  dat$Xhat <- lm.fit$fitted.values
  lm.fit.2 <- lm(t ~ Xhat, data = dat)
  sd(resid(lm.fit.2))
}

roughResidSDEstAFT <- function(dat) 
{
  # returns rough estimate of sd of residuals
  # for smoothed rank estimator for AFT
  #if (all(dat$t >= 0)) {dat$t <- log(dat$t)}
  lm.fit <- lm(t ~ X, data = dat)
  sd(resid(lm.fit))
}

#' @export
AFT2SLSScorePre <- function(...) {
  "2SLS"
}
attr(AFT2SLSScorePre, "name") <- "AFT2SLSScorePre"

#' @export
AFT2SLSScoreSmoothPre <- function(...) {
  "2SLS"
}
attr(AFT2SLSScoreSmoothPre, "name") <- "AFT2SLSScoreSmoothPre"

#' @export
AFT2SLSScoreAllPre <- function(...) {
  "2SLS"
}
attr(AFT2SLSScoreAllPre, "name") <- "AFT2SLSScoreAllPre"

#' @export
AFT2SLSScoreSmoothAllPre <- function(...) {
  "2SLS"
}
attr(AFT2SLSScoreSmoothAllPre, "name") <- "AFT2SLSScoreSmoothAllPre"

#' @export
AFTivIPCWScoreSmooth <- function(beta, data.simu, stdev)
{ 
  if (class(data.simu) != "survival.data") {stop("Need to use data generated by simIVMultivarSurvivalData")}
  
  #the score function for the AFT model
  
  survival <- data.simu$survival
  X <- data.simu$X
  Z <- data.simu$Z
  n <- nrow(X)
  nvars <- ncol(X)
  
  #generate function G_c() for ICPW
  GC <- genKMCensoringFunc(survival)
  
  #transform to log-time
  survival$t <- log(survival$t)
  
  
  #creates G_c(T_i) term in the IPCW estimating equation
  survival$GCT <- GC(exp(survival$t))
  
  #store the T_i - bX_i term (error)
  survival$err = survival$t - X %*% beta
  
  survival$bX <- X %*% beta
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]  
  order.idx <- order(survival$err)
  survival <- survival[order.idx,] 
  X <- as.matrix(X[order.idx,])
  Z <- as.matrix(Z[order.idx,]) 
  
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (survival$GCT != 0)
  survival$GCT[which(survival$GCT == 0)] <- 1
  
  
  #the numerator of the at-risk comparison term 
  all.at.risk.terms <- lapply(survival$err, function(x) {
    denom <- GC(exp(survival$bX + x))
    ind.2.drop <- 1 * (denom != 0)
    denom[which(denom == 0)] <- 1 #to prevent dividing by 0
    div.denom <- ind.2.drop * 1 / denom
    ret <- NULL
    ret[1] <- sum(pnorm(n^(0.26) * (survival$err - x) / stdev) * div.denom)
    for (i in 1:nvars) {
      ret[i + 1] <- sum(Z[,i] * pnorm(n^(0.26) * (survival$err - x) / stdev) * div.denom)
    }
    ret
  })
  
  #first col is as.risk.terms, remaining are at.risk.Z.terms
  at.risk.mat <- do.call(rbind, all.at.risk.terms)
  
  #generate empty vector to return eventually
  ret.vec <- numeric(nvars)
  for (i in 1:nvars) {
    #return the score   
    ret.vec[i] <- sum(zero.indicator * (survival$delta / survival$GCT) * (at.risk.mat[,1] * Z[,i] - at.risk.mat[,i + 1])) / sqrt(n)
  }
  
  ret.vec
}

#' @export
AFTivScoreSmoothPre <- function(beta, survival, X, ZXmat, tau = 1e-3)
{ 
  
  #the score function for the AFT model
  
  n     <- nrow(X)
  nvars <- ncol(X)
  
  if (!is.null(survival$t)) 
  {
    #transform to log-time
    if (is.null(survival$log.t)) 
    {
      log.t <- log(survival$t)
    } else (log.t <- survival$log.t)
  }
  
  delta <- survival$delta
  
  if (n < 10000) 
  {
    #store the T_i - bX_i term (error)
    err = as.vector(log.t - X %*% beta)
    
    #the denominator of the at-risk comparison term  
    out.sig <- sigmoid(outer(err, err, "-"), tau)
    at.risk.terms <- colSums(out.sig)
  } else 
  {
    #store the T_i - bX_i term (error)
    err <- log.t - X %*% beta
    
    #the denominator of the at-risk comparison term  
    at.risk.terms <- unlist(lapply(err, function(x) sum(sigmoid((err - x), tau) )))
  }
  #generate empty vector to return eventually
  #   ret.vec <- numeric(nvars)
  #   for (i in 1:nvars) {
  #     #the numerator of the at-risk comparison term 
  #     at.risk.X.terms <- unlist(lapply(survival$err, function(x) 
  #       sum(ZXmat[,i] * sigmoid((survival$err - x), tau))))
  #     
  #     #return the score   
  #     ret.vec[i] <- sum(survival$delta * (survival$at.risk.terms * ZXmat[,i] - at.risk.X.terms)) / sqrt(n)
  #   }
  
  
  if (n < 10000) 
  {
    ret.vec <- apply(ZXmat, 2, function(ZX) 
    {
      at.risk.X.terms <- colSums(out.sig * ZX)
      sum(delta * (at.risk.terms * ZX - at.risk.X.terms)) / sqrt(n)
    })
  } else 
  {
    ret.vec <- apply(ZXmat, 2, function(ZX) 
    {
      at.risk.X.terms <- unlist(lapply(err, function(x) sum(ZX * sigmoid((err - x), tau))))
      sum(delta * (at.risk.terms * ZX - at.risk.X.terms)) / sqrt(n)
    })
  }
  
  ret.vec
}

attr(AFTivScoreSmoothPre, "name") <- "AFTivScoreSmoothPre"


genIPCWNumDenomMultivar <- function(dat, Zx, Z, X, GC.func, conf.x.loc)
{
  #dat is a data.frame
  #GC.func is a function
  num.vars <- ncol(Zx)
  nrd <- nrow(dat)
  num <- denom <- array(0, dim = c(nrow(dat),1))
  idx <- 1:num.vars
  idx <- idx[-match(conf.x.loc, idx)]
  
  at.risk.list <- lapply(1:nrd, function(i) 
  {
    err.i    <- dat$err[i]
    ind.zero <- FALSE
    
    
    
    if (attr(GC.func, "cox"))
    {
      ipcw <- GC.func(exp(dat$bX[i:nrow(dat)] + err.i), X = data.matrix(cbind(X, Z))[i:nrow(dat),])
    } else 
    {
      ipcw <- GC.func(exp(dat$bX[i:nrow(dat)] + err.i))
    }
      
    if (all(ipcw == 0))
    {
      ipcw <- rep(0.00001, length(ipcw))
      ind.zero <- TRUE
    }
    ipcw[which(ipcw == 0)] <- min(ipcw[which(ipcw != 0)]) / 5
    ret.vec <- array(0, dim = (num.vars + 1))
    if (!ind.zero)
    {
      ret.vec[1] <- sum(1 / ipcw)
      #ret.vec[j+1] <- sum(Zx[i:nrd,j] / ipcw)
      
      ret.vec <- c(sum(1 / ipcw), colSums(Zx[i:nrd,,drop = FALSE] / ipcw))
      # for (j in 1:num.vars)
      # {
      #   ret.vec[j+1] <- sum(Zx[i:nrd,j] / ipcw)
      # }
      # for (j in conf.x.loc)
      # {
      #   ret.vec[j+1] <- sum(Zx[i:nrd,j] / ipcw)
      # }
      return (ret.vec)
    } else 
    {
      return (ret.vec)
    }
  })
  ret.mat <- do.call(rbind, at.risk.list)
  for (j in idx)
  {
    ret.mat[,j+1] <- cumsumRev(Zx[,j])
  }
  ret.mat
}

genIPCWNumDenomMultivar2 <- function(bX, Z, err, GC.func)
{
  #dat is a data.frame
  #GC.func is a function
  num.vars <- ncol(Z)
  Z <- as.matrix(Z)
  n.obs <- nrow(bX)
  
  at.risk.list <- lapply(1:n.obs, function(i) 
  {
    err.i    <- err[i]
    ind.zero <- FALSE
    ipcw     <- GC.func(exp(bX[i:n.obs] + err.i))
    
    if (all(ipcw == 0))
    {
      len.i <- length(ipcw)
      #print(len.i)
      ipcw <- rep(0.001, len.i)
      ind.zero <- TRUE
    }
    #ipcw[which(ipcw == 0)] <- min(ipcw[which(ipcw != 0)]) / 2
    ret.vec <- array(0, dim = num.vars + 1)
    if (!ind.zero)
    {
      ret.vec[1] <- sum(1 / ipcw)
      ret.vec[2:(num.vars+1)] <- if(i != n.obs )
      { 
        colSums(Z[i:n.obs,] / ipcw)
      } else {
        Z[n.obs,] / ipcw
      }
        
      return (ret.vec)
    } else 
    {
      return (ret.vec)
    }
  })
  do.call(rbind, at.risk.list)
}

#' @export
AFTScoreSmoothPre <- function(beta, survival, X, ZXmat, tau = 1e-3)
{ 
  
  #the score function for the AFT model
  
  n <- nrow(X)
  nvars <- ncol(X)
  
  #transform to log-time
  if (is.null(survival$log.t)) 
  { 
    log.t <- log(survival$t)
  } else 
  {
    log.t <- survival$log.t
  }
  
  delta <- survival$delta
  
  if (n < 10000) 
  {
    #store the T_i - bX_i term (error)
    err <- as.vector(log.t - X %*% beta)
    
    #the denominator of the at-risk comparison term  
    out.sig <- sigmoid(outer(err, err, "-"), tau)
    at.risk.terms <- colSums(out.sig) / n
  } else 
  {
    #store the T_i - bX_i term (error)
    err = log.t - X %*% beta
    
    #the denominator of the at-risk comparison term  
    at.risk.terms <- unlist(lapply(err, function(x) sum(sigmoid((err - x), tau) ))) / n
  }
  # #generate empty vector to return eventually
  #   ret.vec <- numeric(nvars)
  #   for (i in 1:nvars) {
  #     #the numerator of the at-risk comparison term 
  #     at.risk.X.terms <- unlist(lapply(err, function(x) sum(X[,i] * sigmoid((err - x), tau))))
  #     
  #     #return the score   
  #     ret.vec[i] <- sum(delta * (at.risk.terms * X[,i] - at.risk.X.terms)) / sqrt(n)
  #   }
  if (n < 10000) 
  {
    ret.vec <- apply(X, 2, function(xi) 
    {
      at.risk.X.terms <- colSums(out.sig * xi) / n
      sum(delta * (at.risk.terms * xi - at.risk.X.terms)) / sqrt(n)
    })
  } else 
  {
    ret.vec <- apply(X, 2, function(xi) 
    {
      at.risk.X.terms <- unlist(lapply(err, function(x) sum(xi * sigmoid((err - x), tau)))) / n
      sum(delta * (at.risk.terms * xi - at.risk.X.terms)) / sqrt(n)
    })
  }
  
  ret.vec
}

attr(AFTScoreSmoothPre, "name") <- "AFTScoreSmoothPre"






######################################################################################################
######################################################################################################
#
#                                 UNIVARIATE ESTIMATION EQUATIONS
#
######################################################################################################
######################################################################################################

#' @export
evalAFTScore <- function(data.simu, beta, multiplier.wts = NULL)
{ 
  #the score function for the AFT model
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
  
  #store the T_i - bX_i term (error)
  data.simu <- data.frame(data.simu, err = data.simu$t - beta * data.simu$X)
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),] 
  ord.idx <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #the denominator of the at-risk comparison term  
  data.simu <- data.frame(data.simu, at.risk.terms = nrow(data.simu):1)
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
    #the numerator of the at-risk comparison term  
    data.simu <- data.frame(data.simu, at.risk.X.terms = cumsumRev(data.simu$X * multiplier.wts))
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$X * multiplier.wts - data.simu$at.risk.X.terms)) / sqrt(nrow(data.simu)))
  } else 
  {
    data.simu <- data.frame(data.simu, at.risk.X.terms = cumsumRev(data.simu$X))
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$X - data.simu$at.risk.X.terms)) / sqrt(nrow(data.simu)))
  }
  
}
vEvalAFTScore <- Vectorize(evalAFTScore, vectorize.args = "beta")
attr(vEvalAFTScore, "name") <- "evalAFTScore"

#' @export
evalAFTivScore <- function(data.simu, beta, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
  
  #store the T_i - bX_i term (error)
  data.simu$err = data.simu$t - beta * data.simu$X
  
  #sort according to error size ####observed failure time 
  ord.idx <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #the denominator of the at-risk comparison term  
  data.simu$at.risk.terms <- nrow(data.simu):1
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
    #the numerator of the at-risk comparison term  
    data.simu$at.risk.Z.terms <- cumsumRev(data.simu$Z * multiplier.wts)
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$Z * multiplier.wts - data.simu$at.risk.Z.terms)) / sqrt(nrow(data.simu)))
  } else 
  {
    #the numerator of the at-risk comparison term  
    data.simu$at.risk.Z.terms <- cumsumRev(data.simu$Z)
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$Z - data.simu$at.risk.Z.terms)) / sqrt(nrow(data.simu)))
  }
  
}
vEvalAFTivScore <- Vectorize(evalAFTivScore, vectorize.args = "beta")
attr(vEvalAFTivScore, "name") <- "evalAFTivScore"



### R & T 1991

#' @export
evalRnTAFTivScore <- function(data.simu, beta, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables
  
  #transform to log-time
  #data.simu$t <- log(data.simu$t)
  
  n.obs <- nrow(data.simu)
  
  #store the T_i - bX_i term (error)
  xbeta <- drop(beta * data.simu$X)
  data.simu$UistarBeta <- drop(data.simu$t * exp(xbeta))
  
  data.simu$CiBeta <- ifelse(beta < 0, data.simu$Cen.time * exp(beta), data.simu$Cen.time)
  
  data.simu$XiBeta <- pmin(data.simu$UistarBeta, data.simu$CiBeta)
  
  #sort according to error size Xi(Beta)
  ord.idx   <- order(data.simu$XiBeta)
  data.simu <- data.simu[ord.idx,] 
  
  
  
  # print(paste(mean(delta), " ", mean(drop(1 * (data.simu$UistarBeta < data.simu$CiBeta)))))
  
  # delta <- drop(1 * (data.simu$UistarBeta < data.simu$CiBeta))
  
  delta <- ifelse(data.simu$CiBeta < data.simu$UistarBeta, 0, data.simu$delta)
  
  
  #the denominator of the at-risk comparison term  
  at.risk.terms <- n.obs:1
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
    #the numerator of the at-risk comparison term  
    at.risk.Z.terms <- cumsumRev(data.simu$Z * multiplier.wts)
    
    #return the score   
    return(  sum(delta * (data.simu$Z * multiplier.wts - at.risk.Z.terms / at.risk.terms)) / sqrt(n.obs)  )
  } else 
  {
    #the numerator of the at-risk comparison term  
    at.risk.Z.terms <- cumsumRev(data.simu$Z)
    
    #return the score   
    return(  sum(delta * 1 * (data.simu$Z - at.risk.Z.terms / at.risk.terms)) / sqrt(n.obs)  )
  }
  
}
vEvalRnTAFTivScore <- Vectorize(evalRnTAFTivScore, vectorize.args = "beta")
attr(vEvalRnTAFTivScore, "name") <- "evalRnTAFTivScore"



###




#' @export
evalAFT2SLSScorePrec <- function(data.simu, beta, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
    
  #store the T_i - bX_i term (error)
  data.simu$err <- data.simu$t - beta * data.simu$X.hat
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]   
  ord.idx   <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #the denominator of the at-risk comparison term  
  data.simu$at.risk.terms <- nrow(data.simu):1
  if (is.null(multiplier.wts)) 
  {
    
    #the numerator of the at-risk comparison term  
    data.simu$t.risk.Z.terms <- cumsumRev(data.simu$X.hat)
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$X.hat - data.simu$t.risk.Z.terms)) / sqrt(nrow(data.simu)))
    
  } else 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
    
    #the numerator of the at-risk comparison term  
    data.simu$t.risk.Z.terms <- cumsumRev(data.simu$X.hat * multiplier.wts)
    
    #return the score   
    return(sum(data.simu$delta * (data.simu$at.risk.terms * data.simu$X.hat * multiplier.wts - data.simu$t.risk.Z.terms)) / sqrt(nrow(data.simu)))
    
  }
  
}
vEvalAFT2SLSScorePrec <- Vectorize(evalAFT2SLSScorePrec, vectorize.args = "beta")
attr(vEvalAFT2SLSScorePrec, "name") <- "evalAFT2SLSScorePrec"


genKMCensoringFuncVar <- function(data)
{
  #returns the G_c function for inverse probability censored weighting
  #by obtaining the Kaplan-Meier estimate for censoring and returning
  #a step function of the estimate
  data$delta.G <- 1 - data$delta
  if (is.null(data$t.original)) 
  {
    if (is.null(data$t)) 
    {
      cens.fit <- survfit(Surv(exp(log.t), delta.G) ~ 1, data = data)
    } else 
    {
      cens.fit <- survfit(Surv(t, delta.G) ~ 1, data = data)
    }
  } else 
  {
    cens.fit <- survfit(Surv(t.original, delta.G) ~ 1, data = data)
  }
  cens.dist  <- data.frame(c(0, summary(cens.fit)$time), c(1, summary(cens.fit)$surv))
  n.risk     <- data.frame(c(0, summary(cens.fit)$time), c(nrow(data), summary(cens.fit)$n.risk))
  cens.var   <- data.frame(c(0, summary(cens.fit)$time), c(summary(cens.fit)$std.err[1], summary(cens.fit)$std.err))
  tmpFunc    <- stepfun(cens.dist[,1], c(1,cens.dist[,2]), f = 0)
  tmpFuncVar <- stepfun(cens.var[,1], c(1,cens.var[,2]), f = 0)
  nriskFunc  <- stepfun(n.risk[,1], c(n.risk[1,2],n.risk[,2]), f = 0)
  list(dist = tmpFunc, var = tmpFuncVar, n.risk = nriskFunc)
}

genKMCensoringFunc <- function(data, cox = FALSE, X = NULL)
{
  #returns the G_c function for inverse probability censored weighting
  #by obtaining the Kaplan-Meier estimate for censoring and returning
  #a step function of the estimate
  data$delta.G <- 1 - data$delta
  if (!cox)
  {
    if (is.null(data$t.original)) 
    {
      if (is.null(data$t)) 
      {
        cens.fit  <- survfit(Surv(exp(log.t), delta.G) ~ 1, data = data)
        cens.dist <- data.frame(c(0, summary(cens.fit)$time), c(1, summary(cens.fit)$surv))
      } else 
      {
        cens.fit  <- survfit(Surv(t, delta.G) ~ 1, data = data)
        cens.dist <- data.frame(c(min(data$t), summary(cens.fit)$time), c(1, summary(cens.fit)$surv))
      }
    } else 
    {
      cens.fit  <- survfit(Surv(t.original, delta.G) ~ 1, data = data)
      cens.dist <- data.frame(c(min(data$t.original), summary(cens.fit)$time), c(1, summary(cens.fit)$surv))
    }
    
    tmpFunc <- stepfun(cens.dist[,1], c(1,cens.dist[,2]), f=0)
    attr(tmpFunc, "cox") <- FALSE
  } else 
  {
    if(is.null(X)) stop("Must provide covariates X to be modeled if cox = TRUE")
    
    if (is.null(data$t.original)) 
    {
      if (is.null(data$t)) 
      {
        cph <- coxph(Surv(exp(data$log.t), data$delta.G) ~ X)
        bh <- basehaz(cph)
        baseline.survival <- exp(-bh$hazard)
        cens.dist.cph <- data.frame(c(0, bh$time), 
                                    c(1, baseline.survival))
      } else 
      {
        cph <- coxph(Surv(data$t, data$delta.G) ~ X)
        bh <- basehaz(cph)
        baseline.survival <- exp(-bh$hazard)
        cens.dist.cph <- data.frame(c(min(data$t), bh$time), 
                                    c(1, baseline.survival))
      }
    } else 
    {
      cph <- coxph(Surv(data$t.original, data$delta.G) ~ X)
      bh <- basehaz(cph)
      baseline.survival <- exp(-bh$hazard)
      cens.dist.cph <- data.frame(c(min(data$t.original), bh$time), 
                                  c(1, baseline.survival))
    }
    
    
    # S_0(t) = exp(-Lambda(t))
    # S(t | X) = S_0(t) ^ exp(x'beta)
    
    tmpFunc.cph <- stepfun(cens.dist.cph[,1], c(1,cens.dist.cph[,2]), f = 0)
    
    coef.cph <- coef(cph)
    names(coef.cph) <- NULL
    
    ## return function to compute estimated not-censoring
    ## probability conditional on x (covariates)
    tmpFunc <- function(t, X)
    {
      if (is.matrix(X))
      {
        return( tmpFunc.cph(t) ^ exp(drop(X %*% coef.cph)) )
      } else 
      {
        return( tmpFunc.cph(t) ^ exp(   sum(X * coef.cph)) )
      }
    }
    
    attr(tmpFunc, "cox") <- TRUE
  }
  
  tmpFunc
}


genIPCWNumDenom <- function(dat, GC.func, multiplier.wts = NULL)
{
  #dat is a data.frame
  #GC.func is a function
  num <- denom <- array(0, dim = c(nrow(dat),1))
  bX <- dat$bX
  if (is.null(multiplier.wts)) 
  {
    Z <- dat$Z
  } else 
  {
    #bX <- dat$bX * multiplier.wts
    Z <- dat$Z * multiplier.wts
  }
  
  err <- dat$err
  len <- nrow(dat)
  for (i in 1:len)
  {
    err.i <- err[i]
    #num.tmp <- denom.tmp <- 0
    #for (j in i:nrow(dat)){
    #  ipcw <- GC.func(dat$bX[j] + err.i)
    #  num.tmp <- num.tmp + dat$Z[j] / ipcw
    #  denom.tmp <- denom.tmp + 1 / ipcw
    #}
    ind.zero <- FALSE
    if (attr(GC.func, "cox"))
    {
      ipcw <- GC.func(exp(bX[i:len] + err.i), X = data.matrix(cbind(dat$X[i:len], dat$Z[i:len])))
    } else 
    {
      ipcw <- GC.func(exp(bX[i:len] + err.i))
    }
    if (all(ipcw == 0))
    {
      ipcw     <- rep(0.0001, length(ipcw))
      ind.zero <- TRUE
    }
    #ind.to.zero <- 1 * (ipcw != 0) 
    ind.to.zero <- 1
    ipcw[which(ipcw == 0)] <- min(ipcw[which(ipcw != 0)]) / 5
    if (!ind.zero)
    {
      num[i]   <- sum((Z[i:len] / ipcw) * ind.to.zero)
      denom[i] <- sum((1 / ipcw) * ind.to.zero)
    } else 
    {
      num[i] <- denom[i] <- 1e5
    }
  }
  dat$IPCW.at.risk.Z.terms <- num
  dat$IPCW.at.risk.terms <- denom
  dat
}

#' @export
evalAFTivIPCWScorePrec <- function(data.simu, beta, GC, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables and Inverse Probability Censoring Weighting
    
  data.simu$t.original <- data.simu$t
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
  
  #store the T_i - bX_i term (error)
  data.simu$err <- data.simu$t - beta * data.simu$X
  
  #store beta * X
  data.simu$bX <- beta * data.simu$X
  
  #creates G_c(T_i) term in the IPCW estimating equation
  if (attr(GC, "cox"))
  {
    data.simu$GCT <- GC(exp(data.simu$t), X = data.matrix(cbind(data.simu$X, data.simu$Z)))
  } else 
  {
    data.simu$GCT <- GC(exp(data.simu$t))
  }
  
  at.risk.terms <- nrow(data.simu):1
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]   
  ord.idx   <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (data.simu$GCT != 0)
  data.simu$GCT[which(data.simu$GCT == 0)] <- 1
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
  }
  
  #the denominator of the at-risk comparison term  
  data.simu <- genIPCWNumDenom(data.simu, GC, multiplier.wts)
  
  nn <- nrow(data.simu)
  
  if (is.null(multiplier.wts)) 
  {
    #return the score   ##mean(data.simu$GCT) * 
    # return(sum(zero.indicator * (data.simu$delta / data.simu$GCT) * at.risk.terms *
    #              (data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms / data.simu$IPCW.at.risk.terms)) / (sqrt(nn)))
    return(sum(zero.indicator * (data.simu$delta / data.simu$GCT) * at.risk.terms *
                 (data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms / data.simu$IPCW.at.risk.terms)) / (sqrt(nn)))
  } else 
  {
    #return the score   ##mean(data.simu$GCT) * 
    return(sum(zero.indicator * (data.simu$delta / data.simu$GCT) * 
                 (data.simu$Z * multiplier.wts/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms / data.simu$IPCW.at.risk.terms)) / (sqrt(nn)))
  }
}
vEvalAFTivIPCWScorePrec <- Vectorize(evalAFTivIPCWScorePrec, vectorize.args = "beta")
attr(vEvalAFTivIPCWScorePrec, "name") <- "evalAFTivIPCWScorePrec"


evalAFTivIPCWScorePrecSquare <- function(data.simu, beta, GC, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables and Inverse Probability Censoring Weighting
  
  data.simu$t.original <- data.simu$t
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
  
  #store the T_i - bX_i term (error)
  data.simu$err <- data.simu$t - beta * data.simu$X
  
  #store beta * X
  data.simu$bX <- beta * data.simu$X
  
  #creates G_c(T_i) term in the IPCW estimating equation
  data.simu$GCT <- GC(exp(data.simu$t))
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]   
  ord.idx <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (data.simu$GCT != 0)
  data.simu$GCT[which(data.simu$GCT == 0)] <- 1
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
  }
  
  #the denominator of the at-risk comparison term  
  data.simu <- genIPCWNumDenom(data.simu, GC, multiplier.wts)
  nn        <- nrow(data.simu)
  
  if (is.null(multiplier.wts)) 
  {
    #return the score   
    return(sum(  (zero.indicator * (data.simu$delta / data.simu$GCT)^2 * 
                 (data.simu$IPCW.at.risk.terms * data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)^2))/nn )
  } else 
  {
    #return the score   
    return(sum(  (zero.indicator * (data.simu$delta / data.simu$GCT)^2 * 
                 (data.simu$IPCW.at.risk.terms * data.simu$Z * multiplier.wts/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)^2))/nn )
  }
}
vEvalAFTivIPCWScorePrecSquare <- Vectorize(evalAFTivIPCWScorePrecSquare, vectorize.args = "beta")
attr(vEvalAFTivIPCWScorePrecSquare, "name") <- "evalAFTivIPCWScorePrecSquare"

evalAFTivIPCWScorePrecNormalized <- function(data.simu, beta, GC, multiplier.wts = NULL)
{ 
  #the score function for the AFT model with instrumental variables and Inverse Probability Censoring Weighting
  
  data.simu$t.original <- data.simu$t
  
  #transform to log-time
  data.simu$t <- log(data.simu$t)
  
  #store the T_i - bX_i term (error)
  data.simu$err <- data.simu$t - beta * data.simu$X
  
  #store beta * X
  data.simu$bX <- beta * data.simu$X
  
  #creates G_c(T_i) term in the IPCW estimating equation
  data.simu$GCT <- GC(exp(data.simu$t))
  
  #sort according to error size ####observed failure time 
  #data.simu <- data.simu[order(data.simu$X),]   
  ord.idx <- order(data.simu$err)
  data.simu <- data.simu[ord.idx,] 
  
  #create indicator to set to zero terms where GCT == 0 and 
  #set to 1 so no dividing by zero occurs
  zero.indicator <- 1 * (data.simu$GCT != 0)
  data.simu$GCT[which(data.simu$GCT == 0)] <- 1
  
  if (!is.null(multiplier.wts)) 
  {
    multiplier.wts <- multiplier.wts[ord.idx]
  }
  
  #the denominator of the at-risk comparison term  
  data.simu <- genIPCWNumDenom(data.simu, GC, multiplier.wts)
  nn <- nrow(data.simu)
  
  if (is.null(multiplier.wts)) 
  {
    #return the score   
    square.part <- sum(  (zero.indicator * (data.simu$delta / data.simu$GCT)^2 * 
                         (data.simu$IPCW.at.risk.terms * data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)^2))/nn 
    est.eqn.part <- sum(zero.indicator * (data.simu$delta / data.simu$GCT) * 
                          (data.simu$IPCW.at.risk.terms * data.simu$Z/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)) / (sqrt(nn))
  } else 
  {
    #return the score   
    square.part <- sum(  (zero.indicator * (data.simu$delta / data.simu$GCT)^2 * 
                         (data.simu$IPCW.at.risk.terms * data.simu$Z * multiplier.wts/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)^2))/nn
    est.eqn.part <- sum(zero.indicator * (data.simu$delta / data.simu$GCT) * 
                          (data.simu$IPCW.at.risk.terms * data.simu$Z * multiplier.wts/nn - (1/nn)*data.simu$IPCW.at.risk.Z.terms)) / (sqrt(nn))
    
    
  }
  (1 / sqrt(square.part)) * est.eqn.part
}
vEvalAFTivIPCWScorePrecNormalized <- Vectorize(evalAFTivIPCWScorePrecNormalized, vectorize.args = "beta")
attr(vEvalAFTivIPCWScorePrecNormalized, "name") <- "vEvalAFTivIPCWScorePrecNormalized"
jaredhuling/aftiv documentation built on May 20, 2019, 9:55 a.m.