R/gtcorr.r
In gtcorr: Calculate efficiencies of group testing algorithms with correlated responses

gtcorr.hierarchical <- function(n, m=1, p, sigma=0, se=1, sp=1, arrangement=c("nested", "random"), model=c('beta-binomial', 'Madsen', 'Morel-Neerchal'), ...) {
  if (any(c(p, sigma, se, sp)<0) || any(c(p, sigma, se, sp) >1)) {
    stop("p, sigma, se, and sp should be between 0 and 1")
  }
  if (n[length(n)] != 1) {
    n <- c(n,1)
  }
  if (length(n)==1) {
    stop("The largest pool size should be greater than 1")
  }
  if (!all(n==as.integer(n))) {
    stop("All elements of n should be integers")
  }
  if (!all(0==n[-length(n)]%%n[-1])) {
    stop("n[s-1] should be divisible by n[s] for all s in 2:length(n)")
  }
  if (any(m>n[1])) {
    stop("m should not be greater than n[1]")
  }
  if (sum(c(length(m),
            length(p),
            length(sigma),
            length(se),
            length(sp))>1)>1) {
    stop("Only one of m, p, sigma, se, and sp can have a length > 1")
  }
  h <- length(n)
  model=match.arg(model)
  arrangement=match.arg(arrangement)
   
  w <- n[1]/n
  
  param.grid <- expand.grid(p=p, sigma=sigma, se=se, sp=sp, m=m)
  eff <- mapply(gtcorr.h.internal,
                p=param.grid$p,
                sigma=param.grid$sigma,
                se=param.grid$se,
                sp=param.grid$sp,
                m=param.grid$m,
                MoreArgs=list(n=n,
                              h=h,
                              w=w,
                              model,
                              arrangement,
                              ...)
                )
  
  result=list(n=n, param.grid=param.grid, arrangement=arrangement, model=model, efficiency=eff)
  result
}


gtcorr.h.internal <- function(p, sigma, se, sp, n, m, h, w, model, arrangement, ...) {

  #Calculate pr(V^T_s1=0) and pr(V^T_s1=1)
  prob.neg <- get(paste("prob.neg.h.", arrangement, sep=""))
  pr.v.t.0 <- prob.neg(p, sigma, n, m, h, w, model, ...)
  #Probabilities that a pool has at least one positive unit for each stage
  pr.v.t.1 <- 1 - pr.v.t.0

  #Calculate pr(V^T_(s-1)=u|V^T_s=0) for s=2, ..., h-1
  #(pr(true state of parent pool given true state of child))
  pr.par.t.giv.ch.t.0 <- list()
  #pr.par.t.giv.ch.t[[s]][u]=pr(V^T_(s-1)1=u|V^T_s1=0)
  #first element [[1]] is NULL
  if (h > 2) {
    for (s in 2:(h-1)) {
      pr.par.t.giv.ch.t.0[[s]] <- rep(NA, 2)
      pr.par.t.giv.ch.t.0[[s]][1] <- pr.v.t.0[s-1]/pr.v.t.0[s]
      pr.par.t.giv.ch.t.0[[s]][2] <- (pr.v.t.1[s-1]-pr.v.t.1[s])/pr.v.t.0[s]
    }
  }

  #Calculate pr(V_(s-1)=1|V^T_s=v) and pr(V_(s-1)=1|V^T_(s-1)=u
  pr.par.1.giv.ch.t <- list()
  #pr.par.1.giv.ch.t[[s]][v+1]=pr(V_(s-1)=1|V^T_s=v)

  if (h > 2) {
    pr.par.1.giv.ch.t <- list()
    pr.par.1.giv.ch.t[[2]] <- rep(NA, 2)
    pr.par.1.giv.ch.t[[2]][1] <- se    *pr.par.t.giv.ch.t.0[[2]][2]+
                                 (1-sp)*pr.par.t.giv.ch.t.0[[2]][1]
    pr.par.1.giv.ch.t[[2]][2] <- se
  }
  
  if (h > 3) {
    for (s in 3:(h-1)) {
      pr.par.1.giv.ch.t[[s]] <- rep(NA, 2)
      pr.par.1.giv.ch.t[[s]][1] <- se    *pr.par.1.giv.ch.t[[s-1]][2]*pr.par.t.giv.ch.t.0[[s]][2]+
                                   (1-sp)*pr.par.1.giv.ch.t[[s-1]][1]*pr.par.t.giv.ch.t.0[[s]][1]
      pr.par.1.giv.ch.t[[s]][2] <- se*pr.par.1.giv.ch.t[[s-1]][2]
    }
  }

  #Calculate pr(V_s=1)
  pr.v.1 <- rep(NA, h-1)
  pr.v.1[1] <- se*pr.v.t.1[1]+(1-sp)*pr.v.t.0[1]
  if (h > 2) {
    for (s in 2:(h-1)) {
      pr.v.1[s] <- se    *pr.par.1.giv.ch.t[[s]][2]*pr.v.t.1[s] +
                   (1-sp)*pr.par.1.giv.ch.t[[s]][1]*pr.v.t.0[s]
    }
  }

  et <- c(1, w[2:(h)]*pr.v.1[1:(h-1)])

  sum(et)/n[1]
}

prob.neg.h.nested <- function(p, sigma, n, m, h, w, model, ...) {
  #returns a vector of length h-1 where the sth element is the probability that a pool at
  #stage s has no positive units
  h.prime <- which.max(n<=m)
  
  if (c(!all(n[1:(h.prime-1)]%%m==0, m%%n[h.prime:h]==0))) {
    stop(paste("n[s] should be divisible by m for s < h' and n[s] should be divisible by m for s>=h'. h'=",
               h.prime, " in this case", sep=""))
  }

  #Vector of probabilities that a pool has no positive units for each stage
  #pr.v.t.0 <- rep(NA, h)  
  pr.v.t.0 <- rep(NA, h-1)
  
  
  #Probability that a cluster has no positive units for cluster sizes up to m
  q <- sapply(1:m, function(m) prob.neg.clust(p, sigma, m, model))
  
  for (s in 1:(h.prime-1)) {
    pr.v.t.0[s] <- q[m]^(n[s]/m)
  }
#  for (s in h.prime:h) {
#    pr.v.t.0[s] <- q[n[s]]
#  }  
  if (h.prime <= h-1) {
    for (s in h.prime:(h-1)) {
      pr.v.t.0[s] <- q[n[s]]
    }
  }
  
  pr.v.t.0
}

prob.neg.h.random <- function(p, sigma, n, m, h, w, model, ...) {
  #returns a vector of length h where the sth element is the probability that a pool at
  #stage s has no positive units
  pr.v.t.0 <- rep(NA, h-1)

  for (s in 1:(h-1)) {
    pr.v.t.0[s] <- prob.neg.pool.rand(p=p, sigma=sigma, m=m, size=n[s], k=n[1]/m, model=model, ...)
  }
  pr.v.t.0
}

gtcorr.hierarchical.user <- function(n,clusters,p,sigma=0,se=1,sp=1, model=c("beta-binomial", "Morel-Neerchal", "Madsen")){
  if (any(c(p, sigma, se, sp)<0) || any(c(p, sigma, se, sp) >1)) {
    stop("p, sigma, se, and sp should be between 0 and 1")
  }
  if (n[length(n)] != 1) {
    n <- c(n,1)
  }
  if (length(n)==1) {
    stop("The largest pool size should be greater than 1")
  }
  if (!all(n==as.integer(n))) {
    stop("All elements of n should be integers")
  }
  if (!all(0==n[-length(n)]%%n[-1])) {
    stop("n[s-1] should be divisible by n[s] for all s in 2:length(n)")
  }

  bad <- FALSE
  l <- max(clusters)
  if (l != as.integer(l)) bad <- TRUE
  if (l < 1) bad <- TRUE
  for (i in 1:l) {
    if (!(i %in% clusters)) bad <- TRUE
  }
  if (bad) stop("clusters should be a vector of integers from 1 to the largest cluster number")

  if (length(clusters) != n[1]) stop("clusters should have length n[1]")

  if (length(p)==1) p <- rep(p, l)
  if (length(p)!=l) stop("p should have length 1 or the largest cluster number")
  if (length(sigma)==1) sigma <- rep(sigma, l)
  if (length(sigma)!=l) stop("sigma should have length 1 or the largest cluster number")

  model <- match.arg(model)
  
  h <- length(n)	# total number of stages (scalar)
  n1 <- length(clusters)# total number of units (scalar)
  w <- n1/n		# total number of possible pools in stage s (vector)
  et <- 1		# expected number of tests at stage 1
  for (ss in 2:h){	# cycle through stages. see Section 3.1 of revision.
    summ <- 0
    for (ii in 1:w[ss-1]) summ <- summ + gtcorr.h.prv(ii,ss-1,n,clusters,p,se,sp,sigma, model)
    etss <- w[ss]/w[ss-1]*summ
    et <- et + etss	# add number of tests expected at stage ss
  }
  eff <- et/n1	# final efficiency 
}

gtcorr.h.prv <- function(ii,ss,n,M,prev,se,sp,sigma, model){
		# prob pool ii in stage ss tests positive. cf equation (4) and (5) of revision
  ans.prv <- 0
  if (ss==1){
  # cf equation (4) revision
    prv1iteq1 <- gtcorr.h.prvsiteq1(ii,1,n,M,prev,sigma, model)
    ans.prv <- se*prv1iteq1 + (1-sp)*(1-prv1iteq1)
  }
  if (ss>1){		
    # cf equation (5) of revision
    pt <- gtcorr.h.prvsiteq1(ii,ss,n,M,prev,sigma, model)
    ps <- c(1-pt,pt)
    for (vv in 0:1) {
      ans.prv <- ans.prv + se^vv * (1-sp)^(1-vv) * gtcorr.h.cp1(vv,ii,ss,n,M,prev,se,sp,sigma, model) * ps[vv+1]
    }
  }
  ans.prv
}	

gtcorr.h.prvsiteq1 <- function(ii,ss,n,M,prev,sigma, model){	# equation (3) revision
  # probability pool ii in stage ss is truly positive
  # prev vector (of length l) of cluster specific prevalences
  # msik vector (of length l) with number of units from cluster k in pool ii in stage ss
  l <- length(prev)
  msik <- matrix(NA,1,l)
  lower <- n[ss]*(ii-1)+1	# first unit in pool ii in stage ss
  upper <- lower+n[ss]-1
  index <- lower:upper # units in pool ii in stage ss
  for (kk in 1:l) msik[kk] <- sum(M[index]==kk)
  myprod <- 1
  for (kk in 1:l){
    myprod <- myprod*prob.neg.clust(prev[kk], sigma[kk], msik[kk], model)
  }
  ans <- 1-myprod
  ans
}

gtcorr.h.cp1 <- function(vv,ii,ss,n,M,prev,se,sp,sigma, model) {
  # conditional probability in equation (5) of revision
  # uses recursion
  if (ss==2){		# eq (5.1) of revision
    if (vv==1) ans <- se
    if (vv==0){
      ans <- 0
      for (uu in 0:1) ans <- ans + se^uu * (1-sp)^(1-uu) * gtcorr.h.cpt(uu,ii,ss,n,M,prev,sigma, model)
        # here cpt is conditional prob on right side of equation (5.1) of revision
    }  
  }
  else{			# eq (5.2) of revision
    iim1 <- ceiling(ii/(n[ss-1]/n[ss]))	# pool in stage ss-1 containing units from pool ii in stage ss
    if (vv==1) ans <- se*gtcorr.h.cp1(1,iim1,ss-1,n,M,prev,se,sp,sigma, model)	# recursion
    if (vv==0){
      ans <- 0
      for (uu in 0:1) ans <- ans + se^uu * (1-sp)^(1-uu) * gtcorr.h.cpt(uu,ii,ss,n,M,prev,sigma, model)* 
        gtcorr.h.cp1(uu,iim1,ss-1,n,M,prev,sigma, model)	# recursion
    }
  }
  ans
} # end cp1	

gtcorr.h.cpt <- function(uu,ii,ss,n,M,prev,sigma,model){
		# conditional probability on right side of equation (5.1) and (5.2) of revision only involving truth
  iim1 <- ceiling(ii/(n[ss-1]/n[ss]))	# pool in stage ss-1 containing units from pool ii in stage ss
  num <- 1-gtcorr.h.prvsiteq1(iim1,ss-1,n,M,prev,sigma, model)
  den <- 1-gtcorr.h.prvsiteq1(ii,ss,n,M,prev,sigma, model)
  if (uu==0) ans1 <- num/den
  else ans1 <- 1-num/den
  ans1
}


###End Hierarchical functions###

###Matrix functions###

gtcorr.matrix <- function(r, c, m=1, p, sigma=0, se=1, sp=1, r.prime, c.prime, arrangement=c("rectangular", "diagonal", "random"), model=c('beta-binomial', 'Madsen', 'Morel-Neerchal'), ...) {
  if (any(c(p, sigma, se, sp)<0) || any(c(p, sigma, se, sp) >1)) {
    stop("p, sigma, se, and sp should be between 0 and 1")
  }
  if (any(r*c %% m !=0)) {
    stop("r*c should be divisible by m")
  }
  arrangement <- match.arg(arrangement)
  model <- match.arg(model)
  if (arrangement == "diagonal" &&
      (r != c || r != m)) {
    stop("For a diagonal arranement, r, c, and m should be equal.")
  }
  if (sum(c(length(p),
            length(sigma),
            length(se),
            length(sp))>1)>1) {
    stop("Only one of p, sigma, se, and sp can have a length > 1")
  }
  if (length(m)>1) {
    stop("m should have length 1")
  }

  
  param.grid <- expand.grid(p=p, sigma=sigma, se=se, sp=sp)
  eff <- mapply(gtcorr.m.internal,
                p=param.grid$p,
                sigma=param.grid$sigma,
                se=param.grid$se,
                sp=param.grid$sp,
                MoreArgs=list(r=r,
                              c=c,
                              m=m,
                              r.prime=r.prime,
                              c.prime=c.prime,
                              model,
                              arrangement,
                              ...)
                )
  
  result=list(r=r, c=c, m=m, r.prime=r.prime, c.prime=c.prime, param.grid=param.grid, arrangement=arrangement, model=model, efficiency=eff)
  result
}

gtcorr.m.internal <- function(p, sigma, se, sp, m, r, c, r.prime, c.prime, model, arrangement, ...) {

  #Get pr(R_i^T=0), pr(C_j^T=0) and pr(R_i^T=C_j^T=0)
  prob.neg <- get(paste("prob.neg.m.", arrangement, sep=""))
  probs <- prob.neg(p=p, sigma=sigma, r=r, c=c, m=m, r.prime=r.prime, c.prime=c.prime, model=model, ...)
  pr.rt.0 <- probs$pr.rt.0
  pr.ct.0 <- probs$pr.ct.0
  pr.rt.ct.0 <- probs$pr.rt.ct.0
  

  #The u+1th, v+1th element of pr.rt.ct is pr(R_i^T=u, C_j^T=v)
  pr.rt.ct <- matrix(NA, 2, 2)
  pr.rt.ct[1, 1] <- pr.rt.ct.0  
  pr.rt.ct[2, 1] <- pr.ct.0 - pr.rt.ct.0
  pr.rt.ct[1, 2] <- pr.rt.0 - pr.rt.ct.0
  pr.rt.ct[2, 2] <- 1 - (pr.rt.0 + pr.ct.0 - pr.rt.ct.0)
  
  pr.r.c.1 <- 0
  for (u in 0:1) {
    for (v in 0:1) {
      pr.r.c.1 <- pr.r.c.1 + se ^ (u + v) * (1 - sp) ^ (2 -u - v) * pr.rt.ct[u+1, v+1]
    }
  }
  et <- r+c+r*c*pr.r.c.1
  et/(r*c)
}

prob.neg.m.rectangular <- function(p, sigma, r, c, m, r.prime, c.prime, model, ...) {
  pr.rt.0 <- prob.neg.clust(p, sigma, c.prime, model) ^ (c / c.prime)
  pr.ct.0 <- prob.neg.clust(p, sigma, r.prime, model) ^ (r / r.prime)
  pr.rt.ct.0 <- prob.neg.clust(p, sigma, c.prime + r.prime - 1, model) *
                prob.neg.clust(p, sigma, c.prime, model) ^ (c / c.prime - 1) *
                prob.neg.clust(p, sigma, r.prime, model) ^ (r / r.prime - 1)
  list(pr.rt.0=pr.rt.0, pr.ct.0=pr.ct.0, pr.rt.ct.0=pr.rt.ct.0)
}

prob.neg.m.diagonal <- function(p, sigma, r, model, ...) {
  #For a diagonal arrangment, r=c=m so can ignore other parameters
  pr.rt.0 <- (1 - p) ^ (r)
  pr.ct.0 <- pr.rt.0
  pr.rt.ct.0 <- (1 - p) * prob.neg.clust(p, sigma, 2, model) ^ (r - 1)
  list(pr.rt.0=pr.rt.0, pr.ct.0=pr.ct.0, pr.rt.ct.0=pr.rt.ct.0)
}

prob.neg.m.random <- function(p, sigma, r, c, m, model, ...) {
  pr.rt.0 <- prob.neg.pool.rand(p=p, sigma=sigma, m=m, size=c, k=r*c/m, model=model, ...)
  if (r == c) { #If the matrix is square, pr(R_i^T=0)=pr(C_j^T=0)
    pr.ct.0 <- pr.rt.0
  } else {
    pr.ct.0 <- prob.neg.pool.rand(p=p, sigma=sigma, m=m, size=r, k=r*c/m, model=model, ...)
  }
  pr.rt.ct.0 <- prob.neg.pool.rand(p=p, sigma=sigma, m=m, size=r+c-1, k=r*c/m, model=model, ...)
  list(pr.rt.0=pr.rt.0, pr.ct.0=pr.ct.0, pr.rt.ct.0=pr.rt.ct.0)  
}

gtcorr.matrix.user <- function(clusters, p, sigma=0, se=1, sp=1, model=c('beta-binomial', 'Madsen', 'Morel-Neerchal')) {
  bad <- FALSE
  l <- max(clusters)
  if (l != as.integer(l)) bad <- TRUE
  if (l < 1) bad <- TRUE
  for (i in 1:l) {
    if (!(i %in% clusters)) bad <- TRUE
  }
  if (bad) stop("clusters should be a matrix of integers from 1 to the largest cluster number")

  if (length(p)==1) p <- rep(p, l)
  if (length(p)!=l) stop("p should have length 1 or the largest cluster number")
  if (length(sigma)==1) sigma <- rep(sigma, l)
  if (length(sigma)!=l) stop("sigma should have length 1 or the largest cluster number")
  if (any(c(p, sigma, se, sp)<0) || any(c(p, sigma, se, sp) >1)) {
    stop("p, sigma, se, and sp should be between 0 and 1")
  }

  model <- match.arg(model)
    
  r <- nrow(clusters)
  c <- ncol(clusters)
  et <- r+c
  for (ii in 1:r) {
    for (jj in 1:c) {
      ricj <- 0
      for (uu in 0:1) {
        for (vv in 0:1) {
          ricj <- ricj + se^(uu+vv) * (1-sp)^(2-uu-vv) * gtcorr.m.joint(ii,jj,uu,vv,clusters,p,sigma, model)
        }
      }
      et <- et + ricj
    }
  }
  eff <- et/(r*c)
  eff
}

gtcorr.m.joint <- function(ii,jj,uu,vv,clusters,p,sigma, model){
  pr0 <- pc0 <- prc0 <- 1
  l <- length(p)
  for (kk in 1:l){
    mik <- sum(clusters[ii,]==kk)	# number of units in row ii from cluster kk
    mjk <- sum(clusters[,jj]==kk)
    mijk <- sum(clusters[ii,]==kk) + sum(clusters[-ii,jj]==kk)
    pr0 <- pr0*prob.neg.clust(p[kk], sigma[kk], mik, model)
    pc0 <- pc0*prob.neg.clust(p[kk], sigma[kk], mjk, model)
    prc0 <- prc0*prob.neg.clust(p[kk], sigma[kk], mijk, model)
  }
  if (uu==1 & vv==1) ans <- 1-(pr0+pc0-prc0)
  if (uu==1 & vv==0) ans <- pc0-prc0
  if (uu==0 & vv==1) ans <- pr0-prc0
  if (uu==0 & vv==0) ans <- prc0
  ans
}

###End Matrix functions###

###General functions###

prob.neg.clust <- function(p, sigma, m, model) {
  #Returns the probability that a cluster of size m has no positive units
  q <- 1-p

  if (m==0) return(1)

  if (sigma==1) return(q)
    
  if (model=='beta-binomial') {
    gamma <- sigma/(1-sigma)
    f <- 1
    for (j in 0:(m-1)) {
      f <- f * #previous product
           #(1 + (q + gamma * j - 1) * (0 <= j & j <= m-1 ) ) * #second term in numerator if j between Xdot and m - 1
           #(1 + gamma * j ) ** -1 ; #denominator
           (1 + (q + gamma * j - 1) ) / #numerator
           (1 + gamma * j ) ; #denominator
    }
    return (f)
  }
  
  if (model=='Madsen') {
    return((1-sigma) * q ^ m + sigma * q)
  }
  
  if (model=='Morel-Neerchal') {
    return(p * ( q * ( 1 - sqrt(sigma))) ^ m + q * (q + p * sqrt(sigma))^m)
  }
}

prob.neg.pool.rand <- function(p, sigma, m, size, k, model, runs=1000, ...) {
  #Returns probability that a pool of size size has no positive units where
  #The k clusters of size m are arranged randomly within the pool

  #clust.idx is a vector of length m*k containing the indecies of each cluster
  #numbered 1 to k
  clust.idx <- as.factor(rep(1:k, each=m))

  #Pools is a matrix with 'runs' columns of lenght 'size'
  #Each column represents the the cluster indicies in a randomly arranged pool
  pools <- replicate(runs, sample(x=clust.idx,size=size), simplify=FALSE)
  counts <- lapply(pools, tabulate)

  #q is a vector of length m+1, where the m'+1th element is q_m', q_0=q[1]=1
  q=sapply(1:m, function(m) prob.neg.clust(p=p, m=m, sigma=sigma, model=model))
  q=c(1,q)

  #qpools is a vector with the probability that a pool tests negative for each run
  qpools <- sapply(counts, function(x) prod(q[x+1]))

  #return average
  mean(qpools)
}

###End General functions###
Any scripts or data that you put into this service are public.
gtcorr documentation built on May 2, 2019, 1:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
gtcorr
Calculate efficiencies of group testing algorithms with correlated responses

R/gtcorr.r
In gtcorr: Calculate efficiencies of group testing algorithms with correlated responses

Try the gtcorr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gtcorr Calculate efficiencies of group testing algorithms with correlated responses

R/gtcorr.r In gtcorr: Calculate efficiencies of group testing algorithms with correlated responses

Try the gtcorr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gtcorr
Calculate efficiencies of group testing algorithms with correlated responses

R/gtcorr.r
In gtcorr: Calculate efficiencies of group testing algorithms with correlated responses