R/rss.AUC.test.R

Defines functions el.auc v01 v10 v01fun v10fun llogpp llogp llog logelr el.rss.test rss.AUC.test

Documented in rss.AUC.test

#' @details This function performs an empirical likelihood ratio test to compare the Area Under the Curve (AUC) between two groups using Ranked Set Sampling (RSS). The test is equivalent to the Mann-Whitney U test, which evaluates whether there is a significant difference between the distributions of two groups. The function supports both Balanced RSS (BRSS) and unbalanced RSS (URSS), as described by Moon et al. (2022). Given two data frames of RSS data (`data1` and `data2`) with `rank` and `y` columns, the function calculates the empirical likelihood ratio test statistic, confidence interval, and p-value based on the hypothesized AUC value `delta0`. Test for `delta0`=0.5 corresponds to the null hypothesis of the Mann-Whitney U test, which asserts that there is no difference between the two groups, implying that any randomly selected observation from one group is just as likely to be greater or smaller than an observation from the other group.
#' @title RSS empirical likelihood ratio (ELR) test in two-sample comparison
#' @name rss.AUC.test
#' @description The rss.AUC.test function conducts an empirical likelihood ratio test to compare the Area Under the Curve (AUC) between two groups using RSS. It supports both balanced and unbalanced RSS designs.
#'
#' @param data1 A numeric data frame of ranked set samples with columns `rank` for ranks and `y` for data values.
#' @param data2 A numeric data frame of ranked set samples with columns `rank` for ranks and `y` for data values.
#' @param alpha A numeric value specifying the confidence level for the interval.
#' @param delta0 A numeric value indicating the hypothesized value of the AUC.
#'
#' @return
#'   \item{RSS_AUC}{The RSS AUC estimate.}
#'   \item{CI}{The confidence interval for the AUC.}
#'   \item{-2*Log.LR}{The empirical log likelihood ratio test statistics.}
#'   \item{p.value}{The p-value for the test.}
#'
#' @references
#'
#' C. Moon, X. Wang, and J. Lim. (2022) Empirical likelihood inference for area under the receiver operating characteristic curve using ranked set samples. Pharmaceutical Statistics, 21(6), 1219–1245.
#'
#' @seealso
#' \code{\link{rss.simulation}}: used for simulating Ranked Set Samples (RSS), which can serve as input.
#'
#' \code{\link{rss.sampling}}: used for sampling Ranked Set Samples (RSS) from a population data set, providing input data.
#'
#' @examples
#' ## balanced RSS with a set size 3 and different sample sizes of 6 or 8 for each stratum,
#' ## using imperfect ranking from a normal distribution with a mean of 0.
#' rss.data1<-rss.simulation(H=3,nsamp=c(6,6,6),dist="normal", rho=0.8,delta=0)
#' rss.data2<-rss.simulation(H=3,nsamp=c(8,8,8),dist="normal", rho=0.8,delta=0.2)
#'
#' ## RSS empirical likelihood ratio test
#' rss.AUC.test(data1=rss.data1, data2=rss.data2, alpha=0.05, delta0=0.5)
#'
#' ## Unbalanced RSS with a set size 3 and different sample sizes of 6, 10, and 8 for each stratum,
#' ## using imperfect ranking from a normal distribution with a mean of 0.
#' rss.data1<-rss.simulation(H=3,nsamp=c(6,10,8),dist="normal", rho=0.8,delta=0)
#' rss.data2<-rss.simulation(H=3,nsamp=c(6,10,8),dist="normal", rho=0.8,delta=0.2)
#'
#' ## RSS empirical likelihood ratio test
#' rss.AUC.test(data1=rss.data1, data2=rss.data2, alpha=0.05, delta0=0.5)
#'
#' @export
#'
#'

rss.AUC.test <- function(data1, data2, alpha=0.05, delta0=0.5){
  if( (alpha > 0) & (alpha < 1)){

    if(!all(c("rank", "y") %in% colnames(data1)) | !all(c("rank", "y") %in% colnames(data2))) {
      stop("The input data must contain 'rank' and 'y' variables.")
    }

    m = length(unique(data1$rank))
    ki = table(data1$rank)
    n = length(unique(data2$rank))
    lr = table(data2$rank)
    nx = sum(ki) ; ny = sum(lr)
    rssx = split(data1$y, data1$rank)
    rssy = split(data2$y, data2$rank)

    delhat = 0
    delhatx = rep(NA,length(rssx))
    delhaty = rep(NA,length(rssy))
    for (ii in 1:length(rssx)){
      delhatxij = rep(NA,length(rssx[[ii]]))
      for (jj in 1:length(rssx[[ii]])){
        for (rr in 1:length(rssy)) {
          delhaty[rr]=mean(rssy[[rr]]>=rssx[[ii]][jj])
        }
        delhatxij[jj]=mean(delhaty)
      }
      delhatx[ii]=mean(delhatxij)
    }
    delhat=mean(delhatx)

    OU = list()
    for (rr in 1:length(rssy)) {
      OU[[rr]] = apply(as.matrix(rssy[[rr]]),1,v01,rssx)
    }

    OUDsq.temp = rep(NA,length(rssy))
    for (rr in 1:length(rssy)) {
      OUDsq.temp[rr] = mean((apply(as.matrix(rssy[[rr]]),1,v01,rssx)-delhat)^2)
    }
    OUDsq = mean(OUDsq.temp)

    v10i.bar=function(rssx,rssy,i){
      return(mean(apply(as.matrix(rssx[[i]]),1,v10,rssy)))
    }

    v01r.bar=function(rssx,rssy,r){
      return(mean(apply(as.matrix(rssy[[r]]),1,v01,rssx)))
    }

    s10i.sq=function(rssx,rssy,i,ki){
      v10val=apply(as.matrix(rssx[[i]]),1,v10,rssy)
      return(sum( (v10val-v10i.bar(rssx,rssy,i))^2 )/(ki[i]-1) )
    }

    s01r.sq=function(rssx,rssy,r,lr){
      v01val=apply(as.matrix(rssy[[r]]),1,v01,rssx)
      return(sum( (v01val-v01r.bar(rssx,rssy,r))^2 )/(lr[r]-1) )
    }

    s10.sq.vec = rep(NA,m)
    for (ii in 1:m) {
      s10.sq.vec[ii] = s10i.sq(rssx,rssy,ii,ki)
    }
    s10.sq = mean(s10.sq.vec)

    s01.sq.vec = rep(NA,n)
    for (rr in 1:n) {
      s01.sq.vec[rr] = s01r.sq(rssx,rssy,rr,lr)
    }
    s01.sq = mean(s01.sq.vec)

    S.sq = (ny*s10.sq+nx*s01.sq)/(nx+ny)

    if(S.sq==0){
      warning("Too small sample allocations, leading to NA confidence interval.")

      CI.low = NA
      CI.up = NA
    }else{
      r.adj = (nx)/(ny+nx)*OUDsq/(S.sq)
      rss.el.res = try(emplik::findUL2(step=0.005, fun=el.auc, MLE=delhat, x=OU, r.adj=r.adj))
      CI.low = rss.el.res$Low
      CI.up = rss.el.res$Up
    }

    LLR = el.rss.test(ou=OU, mu=delta0)$'-2LLR'
    LLR = LLR *r.adj
    pval = 1-stats::pchisq(LLR, df=1)

    result <- list(RSS_AUC = delhat, CI = c(CI.low, CI.up), "-2*log.LR" = LLR, p.value = pval)
    return(result)

  }else stop("alpha is out of bound.", call. = F)
}


#' @noRd
el.rss.test <- function(ou, mu, lam, maxit=25, gradtol=1e-7, svdtol = 1e-9, itertrace=FALSE ){
  x <- as.matrix(unlist(ou))
  xn <- nrow(x)
  xp <- ncol(x)
  mu <- as.vector(mu)

  if( length(mu) !=xp )
    stop("mu must have same dimension as observation vectors.")
  if( xn <= xp )
    stop("Need more observations than length(mu) in el.test().")

  sou = list() # standardized OU
  for (ii in 1:length(ou)){
    sou[[ii]]=(ou[[ii]]-mu)/length(ou[[ii]])
  }
  z = as.matrix(unlist(sou))

  TINY <- sqrt( .Machine$double.xmin )
  scale <- mean( abs(z) ) + TINY
  z <- z/scale
  if( !missing(lam) ){
    lam <- as.vector(lam)
    lam <- lam*scale
    if( logelr(z,rep(0,xp),lam)>0 )lam <- rep(0,xp)
  }
  if(  missing(lam)  )
    lam <- rep(0,xp)

  if( svdtol < TINY )svdtol <- TINY
  if( gradtol < TINY)gradtol <- TINY

  nwts <- c( 3^-c(0:3), rep(0,12) )
  gwts <- 2^( -c(0:(length(nwts)-1)))
  gwts <- (gwts^2 - nwts^2)^.5
  gwts[12:16] <- gwts[12:16] * 10^-c(1:5)

  nits <- 0
  gsize <- gradtol + 1
  while(  nits<maxit && gsize > gradtol  ){
    arg  <- 1 + z %*% lam
    wts1 <- as.vector( llogp(arg, 1/xn) )
    wts2 <- as.vector( -llogpp(arg, 1/xn) )^.5
    grad <- as.matrix( -z*wts1 )
    grad <- as.vector(rowsum(grad, rep(1, nrow(grad)) ) )
    gsize <- mean( abs(grad) )
    hess <- z*wts2

    svdh <- svd( hess )
    if( min(svdh$d) < max(svdh$d)*svdtol )
      svdh$d <- svdh$d + max(svdh$d)*svdtol
    nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
    nstep <- as.vector( nstep %*% matrix(wts1/wts2,xn,1) )

    gstep <- -grad
    if(  sum(nstep^2) < sum(gstep^2) )
      gstep <- gstep*(sum(nstep^2)^.5/sum(gstep^2)^.5)
    ologelr <- -sum( llog(arg,1/xn) )
    ninner <- 0
    for(  i in 1:length(nwts) ){
      nlogelr <- logelr( z,rep(0,xp),lam+nwts[i]*nstep+gwts[i]*gstep )
      if( nlogelr < ologelr ){
        lam <- lam+nwts[i]*nstep+gwts[i]*gstep
        ninner <- i
        break
      }
    }
    nits <- nits+1
    if(  ninner==0  )nits <- maxit
    if( itertrace )
      print( c(lam, nlogelr, gsize, ninner) )
  }

  list( "-2LLR" = -2*nlogelr, Pval = 1-stats::pchisq(-2*nlogelr, df=xp),
        lambda = lam/scale, grad=grad*scale,
        hess=t(hess)%*%hess*scale^2, wts=wts1, nits=nits )
}

logelr <- function(x,mu,lam){
  x <- as.matrix(x)
  xn <- nrow(x)
  xp <- ncol(x)
  if(  xn <= xp  )
    stop("Need more observations than variables in logelr.")
  mu <- as.vector(mu)
  if(  length(mu) != xp  )
    stop("Length of mean doesn't match number of variables in logelr.")

  z <- t( t(x) -mu )
  arg <- 1 + z %*% lam
  return( - sum( llog(arg,1/xn) ) )
}

llog <- function( z, eps ){
  ans <- z
  avoidNA <- !is.na(z)
  lo <- (z<eps) & avoidNA
  ans[ lo  ] <- log(eps) - 1.5 + 2*z[lo]/eps - 0.5*(z[lo]/eps)^2
  ans[ !lo ] <- log( z[!lo] )
  ans
}

llogp <- function( z, eps ){
  ans <- z
  avoidNA <- !is.na(z)
  lo <- (z<eps) & avoidNA
  ans[ lo  ] <- 2.0/eps - z[lo]/eps^2
  ans[ !lo ] <- 1/z[!lo]
  ans
}

llogpp <- function( z, eps ){
  ans <- z
  avoidNA <- !is.na(z)
  lo <- (z<eps) & avoidNA
  ans[ lo  ] <- -1.0/eps^2
  ans[ !lo ] <- -1.0/z[!lo]^2
  ans
}


v10fun = function(yvec,xval){
  mean(yvec>=xval)
}
v01fun = function(xvec,yval){
  mean(yval>=xvec)
}

v10 = function(xij,rssy) {
  mean(unlist(lapply(rssy,v10fun,xij)))
}

v01 = function(yrs,rssx) {
  mean(unlist(lapply(rssx,v01fun,yrs)))
}

el.auc <- function(mu,x,r.adj) {
  elres = el.rss.test(ou=x,mu=mu)
  # adjust log EL
  elres$"-2LLR" = elres$"-2LLR"*r.adj
  return(elres)
}

Try the generalRSS package in your browser

Any scripts or data that you put into this service are public.

generalRSS documentation built on April 4, 2025, 12:19 a.m.