R/stsubpop.R
In stepp: Subpopulation Treatment Effect Pattern Plot (STEPP)

Documented in stepp.subpop

#################################################################
#
# stsubpop.R
#
#######################
# stepp subpopulation #
#######################

# supporting routine to generate subpopulation based just on the coviarate and window
generate.all <- function(sp, win, covariate, coltype = NULL, coltrt = NULL, trts = NULL, minsubpops = NULL) {
  r1 <- win@r1
  r2 <- win@r2
  e1 <- win@e1
  e2 <- win@e2

  if (win@type == "tail-oriented") {
    zvals   <- unique(sort(covariate))
    if (min(r1) < min(zvals) | max(r2) > max(zvals)) stop("Cannot create tail-oriented window")
    # nsubpop <- length(r1) + length(r2) + 1 ### [SV 27.02.2021: shouldn't it be length(r1)+length(r2)?]
    nsubpop <- length(r1) +length(r2)
    npatsub <- rep(0, nsubpop)
    I0      <- rep(1, nsubpop)
    I1      <- rep(length(zvals), nsubpop)
    # if (length(r1) != 0) { ### [SV 27.02.2021: shouldn't it be > 1?]
    if (length(r1) > 1) {
      for (i in 1:length(r1)) {
        npatsub[i] <- sum(covariate <= r1[i])
        sel <- which(zvals <= r1[i])
        I1[i] <- sel[length(sel)]
      }
      npatsub[length(r1) + 1] <- length(covariate)
    }
    # if (length(r2) != 0) { ### [SV 27.02.2021: shouldn't it be > 1?]
    if (length(r2) > 1) {
      npatsub[1] <- length(covariate)
      for (i in 1:length(r2)) {
        k <- i + length(r1)
        npatsub[k] <- sum(covariate >= r2[i])
        I0[k] <- which(zvals >= r2[i])[1]
      }
    }
  } else if (win@type == "sliding") {
    if (r2 >= length(covariate)) {
      errmsg <- "patspop (r2) must be strictly smaller than the number of unique covariate values."
      stop(errmsg)
    }
    zvals <- unique(sort(covariate))
    absfreq <- as.numeric(table(covariate))
    cumfreq <- cumsum(absfreq)

    I0 <- rep(0, 1000)  # max size of I0 and I1 is 1000; these limit the 
    I1 <- rep(0, 1000)  # max no. of stepp subpopulation to 1000
    I0[1] <- 1
    I1[1] <- sum(cumfreq < r2) + 1
    stopflag <- 0
    nsubpop <- 2
    while (stopflag == 0) {
      indinf <- I0[nsubpop - 1] + 1
      while ((cumfreq[I1[nsubpop - 1]] - cumfreq[indinf - 1]) > r1) {
        indinf <- indinf + 1
      }
      I0[nsubpop] <- indinf
      indsup <- I1[nsubpop - 1]
      while (((cumfreq[indsup] - cumfreq[I0[nsubpop]] + absfreq[I0[nsubpop]]) < r2) && (stopflag == 0)) {
        indsup <- indsup + 1
        stopflag <- 1 * (indsup == length(zvals))
      }
      I1[nsubpop] <- indsup
      nsubpop <- nsubpop + 1
    }

    nsubpop    <- nsubpop - 1
    npatsub    <- rep(0,  nsubpop)
    npatsub[1] <- cumfreq[I1[1]]
    for (i in 2:nsubpop) npatsub[i] <- cumfreq[I1[i]] - cumfreq[I0[i] - 1]
    I0 <- I0[1:nsubpop]
    I1 <- I1[1:nsubpop]
  } else if (win@type == "sliding_events") {
    #
    #   create 0/1 treatment assignment 
    #
    txassign <- coltrt
    for (i in 1:length(txassign)) {
      if (txassign[i] != trts[1] & txassign[i] != trts[2]) {
        txassign[i] <- NA
      }
      if (txassign[i] == trts[1]) {
        txassign[i] <- 1
      }
      else if (txassign[i] == trts[2]) {
        txassign[i] <- 0
      }
    }
    #
    # get the overlapping subgroups
    #
    # zvals is the unique values of the covariate that has been sorted at which there has been an event
    zvals <- unique(sort(covariate))
    # absfreq is the total number of events of that covariate value corresponding to zvals
    absfreq <- as.numeric(table(covariate))
    cumfreq <- cumsum(absfreq)
    #
    # Create vector of number of Type 1's recorded for each covariate
    # value for each treatment type
    # 
    type1valsTrt0 <- type1valsTrt1 <- rep(0, length(zvals))

    for (i in 1:length(covariate)) {
      if (coltype[i] == 1) {
        covariateIndices <- which(zvals == covariate[i])
        if (length(covariateIndices) != 1) {
          stop("Non-unique covariate detected in zvals vector or value missing.")
        }
        covariateIndex <- covariateIndices[1]
        #####NEW CODE
        if (txassign[i] == 0) {
          type1valsTrt0[covariateIndex] <- type1valsTrt0[covariateIndex] + 1
        }
        if (txassign[i] == 1) {
          type1valsTrt1[covariateIndex] <- type1valsTrt1[covariateIndex] + 1
        }
      }
    }

    # Create cummulative graph of typevals 
    cumtypevalsTrt0 <- cumsum(type1valsTrt0)
    cumtypevalsTrt1 <- cumsum(type1valsTrt1)
         
    I0 <- I1 <- rep(0, length(zvals))

    #
    # sets the boundaries of the first subpopulation
    #
    I0[1] <- 1

    #
    # Determines the minimum upper boundary that satisfies the value of e2 for both treatment types
    #
    upperTrt0 <- sum(cumtypevalsTrt0 < e2) + 1
    upperTrt1 <- sum(cumtypevalsTrt1 < e2) + 1
    I1[1] <- max(upperTrt0, upperTrt1)

    #
    # Check to see if the upper boundary of the first subpop is greater than
    # the size of the entire population.  This indicates that there are
    # insufficient events to satisfy E2.
    #
    if (I1[1] >= length(covariate)) {
      # obsInitialSubpopCreationFailures <- obsInitialSubpopCreationFailures + 1
      print(paste("Warning: Insufficient Type 1 Events found creating initial subpopulation (",
                  "Trt0=", cumtypevalsTrt0[length(cumtypevalsTrt0)],
                   " Trt1=", cumtypevalsTrt1[length(cumtypevalsTrt1)], ")", sep=""))
      stop("Throwing out dataset.")
    }

    #
    # Find the boundaries of each subpopulation
    #
    stopflag <- 0
    nsubpop <- 2
    while (stopflag == 0) {
      #
      # find the lower boundary of the next subpopulation
      #
      # indinf is the presumptive lower bound of the next subpopulation 
      indinf <- I0[nsubpop - 1] + 1

      while (((cumtypevalsTrt0[I1[nsubpop - 1]] - cumtypevalsTrt0[indinf - 1]) > e1) ||
        ((cumtypevalsTrt1[I1[nsubpop - 1]] - cumtypevalsTrt1[indinf - 1]) > e1)) {
        indinf <- indinf + 1
      }
      I0[nsubpop] <- indinf
      indsup <- I1[nsubpop - 1]
      #
      # find the upper boundary of the next subpopulation
      #
      while ((((cumtypevalsTrt0[indsup] - cumtypevalsTrt0[I0[nsubpop]] + type1valsTrt0[I0[nsubpop]]) < e2) ||
        ((cumtypevalsTrt1[indsup] - cumtypevalsTrt1[I0[nsubpop]] + type1valsTrt1[I0[nsubpop]]) < e2)) &&
        (stopflag == 0)) {
        indsup <- indsup + 1
        stopflag <- (indsup == length(zvals))
      }
      I1[nsubpop] <- indsup
      # increment total number of subpopulations for next loop iteration
      nsubpop <- nsubpop + 1
    }
    # decrement nsubpop to number of found subpopulations
    nsubpop <- nsubpop - 1

    #
    # If the last subpopulation is too small for either treatment arm (< e2)
    # include it in the previous subpopulation so that e2 is not violated
    #
    if (((cumtypevalsTrt0[I1[nsubpop]] - cumtypevalsTrt0[I0[nsubpop] - 1]) < e2) ||
      ((cumtypevalsTrt1[I1[nsubpop]] - cumtypevalsTrt1[I0[nsubpop] - 1]) < e2)) {
      I1[nsubpop - 1] <- I1[nsubpop]
      nsubpop <- nsubpop - 1
    }

    #
    # Make sure there are enough subpopulations
    #
    if (nsubpop < minsubpops) {
      # obsInsufficientSubpopsErrors <- obsInsufficientSubpopsErrors + 1
      print(paste("Warning: too few subpopulations (", nsubpop, ")", sep = ""))
      stop("Throwing out dataset.")
    }

    npatsub <- rep(0, nsubpop)
    npatsub[1] <- cumfreq[I1[1]]
    for (i in 2:nsubpop) npatsub[i] <- cumfreq[I1[i]] - cumfreq[I0[i] - 1]  
    neventsubTrt0 <- neventsubTrt1 <- rep(0, nsubpop)
    neventsubTrt0[1] <- cumtypevalsTrt0[I1[1]]
    neventsubTrt1[1] <- cumtypevalsTrt1[I1[1]]
    for (i in 2:nsubpop) {
      neventsubTrt0[i] <- cumtypevalsTrt0[I1[i]] - cumtypevalsTrt0[I0[i] - 1]
      neventsubTrt1[i] <- cumtypevalsTrt1[I1[i]] - cumtypevalsTrt1[I0[i] - 1]
    }
    I0 <- I0[1:nsubpop]
    I1 <- I1[1:nsubpop]
  }

  medians <- rep(0,  nsubpop)
  minc    <- rep(NA, nsubpop)
  maxc    <- rep(NA, nsubpop)
  npats   <- length(covariate)
  subpop  <- matrix(rep(0, (npats*nsubpop)), ncol = nsubpop)
  for (i in 1:nsubpop) {
    subpop[, i] <- (covariate >= zvals[I0[i]]) * (covariate <= zvals[I1[i]])
    medians[i]  <- round((median(covariate[subpop[, i] == 1])), digits = 2)
    minc[i]     <- zvals[I0[i]]
    maxc[i]     <- zvals[I1[i]]
  }

  # update the object
  sp@win <- win
  sp@covar <- covariate
  sp@nsubpop <- nsubpop
  sp@subpop <- subpop
  sp@npatsub <- npatsub
  sp@medianz <- medians
  sp@minc <- minc
  sp@maxc <- maxc
  if (sp@win@type == "sliding_events") {
    sp@neventsubTrt0 <- neventsubTrt0
    sp@neventsubTrt1 <- neventsubTrt1
  } else {
    sp@neventsubTrt0 <- NULL
    sp@neventsubTrt1 <- NULL
  }
  sp@init <- TRUE

  return(sp)
}

setClass("stsubpop",
  representation(
    win      = "stwin",   # stepp window object
    covar   = "numeric", # vector of covariate of interest (V) 
    nsubpop  = "numeric", # number of subpopulation generated
    subpop   = "ANY",     # matrix of subpopulations
    npatsub  = "numeric", # count of each subpopulation
    medianz  = "numeric", # median of V for each subpopulation
    minc     = "numeric", # minimum of V for each subpopulation, actual
    maxc     = "numeric", # maximum of V for each subpopulation, actual
    neventsubTrt0 = "null_or_numeric",
    neventsubTrt1 = "null_or_numeric",
    init     = "logical"  # initialized
    )
)

setMethod("initialize", "stsubpop",
  function(.Object) {
    .Object@init <- FALSE
    return(.Object)
  }
)

setValidity("stsubpop", 
  function(object) {
    if (!is.numeric(object@covar) || length(object@covar) < object@win@r2) {
      print("Invalid argument.")
      return (FALSE)
    }
    return(TRUE)
  }
)

setGeneric("generate", function(.Object, win, covariate, coltype, coltrt, trts, minsubpops)
  standardGeneric("generate"))  

setMethod("generate", 
  signature="stsubpop",
  definition = function(.Object, win, covariate, coltype, coltrt, trts, minsubpops) {
    if (missing(minsubpops)) {
      minsubpops <- 2
    }
    .Object <- generate.all(.Object, win, covariate, coltype, coltrt, trts, minsubpops)
    return(.Object)
  }
)

setGeneric("merge", function(.Object, mergevector)
  standardGeneric("merge"))

setMethod("merge", 
  signature = "stsubpop",
  definition = function(.Object, mergevector) {
    subp.new <- new("stsubpop")

    if (.Object@nsubpop != length(mergevector)) stop("invalid merge vector length.")
    if (.Object@nsubpop == 1) {
      # if there is only one subpopulation left, there is nothing to merge
      subpop.new <- .Object
    } else {
      v1 <- c(mergevector[1], mergevector[-length(mergevector)])
      run <- c(1, which(v1 != mergevector), length(mergevector) + 1)
      merge <- mergevector[1]
    
      subpop.matrix <- NULL

      nsubpop.new <- 0
      for (i in 1:(length(run) - 1)) {
        beg <- run[i]
        end <- run[i + 1] - 1
        if (merge & (beg != end)) {
          subpop.matrix <- cbind(subpop.matrix, apply(.Object@subpop[, beg:end], 1, max))
          nsubpop.new <- nsubpop.new + 1
        } else {
          subpop.matrix <- cbind(subpop.matrix, .Object@subpop[, beg:end])
          nsubpop.new <- nsubpop.new + end - beg + 1
        }
        merge <- !merge
      }

      medianz <- rep(0, nsubpop.new)
      minc    <- rep(0, nsubpop.new)
      maxc    <- rep(0, nsubpop.new)

      covariate <- .Object@covar
      for (i in 1:nsubpop.new) {
        subpop.cov <- covariate[subpop.matrix[, i] == 1]
        medianz[i] <- round((median(subpop.cov)), digits = 2)
        minc[i]    <- min(subpop.cov)
        maxc[i]    <- max(subpop.cov)
      }

      subp.new@win     <- .Object@win
      subp.new@covar  <- .Object@covar
      subp.new@nsubpop <- nsubpop.new
      subp.new@npatsub <- apply(subpop.matrix,2,sum)
      subp.new@subpop  <- subpop.matrix
      subp.new@medianz <- medianz
      subp.new@minc    <- minc
      subp.new@maxc    <- maxc
      subp.new@neventsubTrt0 <- .Object@neventsubTrt0
      subp.new@neventsubTrt1 <- .Object@neventsubTrt1
      subp.new@init    <- TRUE
    }
    return(subp.new)
  }
)

setGeneric("edge", function(.Object, j, side)
  standardGeneric("edge"))  

setMethod("edge", 
  signature = "stsubpop",
  definition = function(.Object, j, side) {
    if (side=="L" & j ==1) stop("invalid arg j.")

    # sort the covariate value
    covariate <- .Object@covar
    Z <- sort(covariate)
    ZU <- unique(Z)

    if (side == "L") {
      left.edge  <- .Object@minc[j-1]
      left.i   <- (which(ZU == left.edge))[1]
      rside  <- which(ZU == .Object@maxc[j-1])
      right.i    <- rside[length(rside)]
      n      <- min(right.i - left.i + 1, length(ZU)-right.i+1)
      start      <- left.i
    } else if (side == "R") {
      right.edge <- .Object@maxc[j+1]
      rside  <- which(ZU == right.edge)
      right.i  <- rside[length(rside)]
      left.i   <- (which(ZU == .Object@minc[j+1]))[1]
      n      <- min(right.i-left.i+1,left.i)
      start  <- left.i-n+1
    } else {
      stop("unknown side.")
    }

    # create a special stepp subpopulation just for this edge
    subpop.matrix <- matrix(rep(0, (length(covariate) * n)), ncol = n)
    medianz <- rep(0, n)
    minc    <- rep(0, n)
    maxc    <- rep(0, n)

    # print(paste(side, "j=",j, "start=",start, "n=",n,ZU[start], ZU[start+n-1]))
    # print(ZU)
    
    for (i in 1:n) {
      # print(c(i, ZU[start + i - 1], ZU[start + i + n - 1]))
      subpop.matrix[,i ] <- (covariate >= ZU[start + i - 1]) * (covariate <= ZU[start + i+n - 2])

      subpop.cov <- covariate[subpop.matrix[, i] == 1]
      medianz[i] <- round((median(subpop.cov)), digits = 2)
      minc[i]    <- min(subpop.cov)
      maxc[i]    <- max(subpop.cov)
    }

    subp.new         <- new("stsubpop")
    subp.new@win     <- .Object@win
    subp.new@covar  <- .Object@covar
    subp.new@nsubpop <- n
    subp.new@subpop  <- subpop.matrix
    subp.new@npatsub <- apply(subpop.matrix,2,sum)
    subp.new@medianz <- medianz
    subp.new@minc    <- minc
    subp.new@maxc    <- maxc
    subp.new@neventsubTrt0 <- .Object@neventsubTrt0
    subp.new@neventsubTrt1 <- .Object@neventsubTrt1
    subp.new@init    <- TRUE
    
    return(subp.new)
  }
)

setMethod("summary", 
  signature = "stsubpop",
  definition = function(object) {
    summary(object@win)
    if (object@init) {
      if (object@win@type == "sliding_events") {
        write(paste("Number of subpopulations created:", object@nsubpop), file = "")
        cat("\n")
        write("Subpopulation summary information", file = "")
        nper <- apply(object@subpop, 2, sum)
        temp <- matrix(c(1:object@nsubpop, object@medianz, round(object@minc, digits = 4),
          round(object@maxc, digits = 4), nper, object@neventsubTrt0, object@neventsubTrt1), ncol = 7)
        write("                         Covariate Summary               Sample        Type 1 Events", file = "")
        write(" Subpopulation    Median      Minimum      Maximum         Size  Trt Group 1   Trt Group 2", file = "")
        for (i in 1:object@nsubpop) {
          write(paste(format(temp[i, 1], width = 8), format(temp[i, 2], width = 15, nsmall = 2),
            format(temp[i, 3], width = 12, nsmall = 4), format(temp[i, 4], width = 12, nsmall = 4),
            format(temp[i, 5], width = 12), format(temp[i, 6], width = 12),
            format(temp[i, 7], width = 13)), file = "")
        }
      } else {
        write(paste("Number of subpopulations created:", object@nsubpop), file = "")
        cat("\n")
        write("Subpopulation summary information (including all treatments)", file = "")
        nper <- apply(object@subpop, 2 ,sum)
        temp <- matrix(c(1:object@nsubpop, object@medianz, round(object@minc, digits = 4),
          round(object@maxc, digits = 4), nper), ncol = 5)
        write("                                  Covariate Summary                 Sample", file = "")
        write("     Subpopulation        Median       Minimum       Maximum          size", file = "")
        for (i in 1:object@nsubpop) {
          if (object@win@type == "tail-oriented" & i == length(object@win@r1) + 1 & length(object@win@r1) > 1) {
            write(paste(format(temp[i, 1], width = 12), format(temp[i, 2], width = 19, nsmall = 2),
              format(temp[i, 3], width = 13, nsmall = 4), format(temp[i, 4], width = 13, nsmall = 4),
              format(temp[i, 5], width = 13), "(entire cohort)"), file = "")
          } else if (object@win@type == "tail-oriented" & i == 1 & length(object@win@r2) > 1) {
            write(paste(format(temp[i, 1], width = 12), format(temp[i, 2], width = 19, nsmall = 2),
              format(temp[i, 3], width = 13, nsmall = 4), format(temp[i, 4], width = 13, nsmall = 4),
              format(temp[i, 5], width = 13), "(entire cohort)"), file = "")
          } else {
            write(paste(format(temp[i, 1], width = 12), format(temp[i, 2], width = 19, nsmall = 2),
              format(temp[i, 3], width = 13, nsmall = 4), format(temp[i, 4], width = 13, nsmall = 4),
              format(temp[i, 5], width = 13)), file = "")
          }
        }
      }
    } else {
      write("Subpopulations have not been generated yet.", file = "")
    }
  }
)

# constructor and accessor functions for stepp subpopulation
# 1. constructor
stepp.subpop <- function(swin, cov, coltype = NULL, coltrt = NULL, trts = NULL, minsubpops = NULL) {
  subp <- new("stsubpop")    
  subp <- generate(subp, win = swin, covariate = cov, coltype = coltype, coltrt = coltrt, trts = trts,
    minsubpops = minsubpops)
  return(subp)
}

# 2. merge the subpopulation according to the merge vector mv
stepp.merge.subpop <- function(subpop, mv) {
  subp.new <- merge(subpop, mv)
  return(subp.new)
}

#3. create the edge subpopulation for the "L" or "R" side of the jth subgroup
stepp.edge.subpop <- function(subpop, j, side) {
  subp.new <- edge(subpop, j, side)
  return(subp.new)
}