# R/phaseI.R In osDesign: Design and analysis of observational studies

#### Documented in phaseI

```phaseI <-
function(betaTruth,
X,
N,
strata=NULL,
expandX="all",
etaTerms=NULL,
nII0=NULL,
nII1=NULL,
cohort=TRUE,
NI=NULL,
digits=NULL)
{
## Checks
##
problem <- coreChecks(betaTruth=betaTruth, X=X, N=N, etaTerms=etaTerms, expandX=expandX, betaNames=NULL)
if(problem != "")
stop(problem)
##
problem <- phaseIChecks(X=X, strata=strata, cohort=cohort, NI=NI, nII0=nII0, nII1=nII1)
if(problem != "")
stop(problem)

##
if(is.null(colnames(X)))
colnames(X) <- c("Int", paste("V", 1:(ncol(X) - 1), sep = ""))

## Phase I stratification
##  * if strata = 'NULL' return all columns
if(is.null(strata))
strata <- 2:ncol(X)
strataMat <- as.matrix(X[,strata])
colnames(strataMat) <- colnames(X)[strata]

## Restrict design matrix to columns indicated in 'etaTerms'
##
if(!is.null(etaTerms))
X <- X[, is.element(colnames(X), etaTerms)]

## Expanded design matrix
##
Xexp <- X
if(expandX[1] != "none")
Xexp <- as.data.frame(expandCatX(X, expandX=expandX))

## Outcome probabilities
##
piY <- expit(as.vector(as.matrix(Xexp) %*% betaTruth))

## Calculate the expected number of Cases/nonCases for each row of X
##  * adjustment if the phase I sample is a case-control sample (of size NI=c(N0,N1))
##
N0k <- N * (1-piY)
N1k <- N * piY
if(cohort == FALSE)
{
N0k <- N0k * NI[1]/sum(N0k)
N1k <- N1k * NI[2]/sum(N1k)
}

## Phase I counts for the given stratification
##  * if strata = 'NULL' just return the population
##
## Need to reverse the ordering b/c of the way aggregate totals across the variables within 'strata'
##
aggCall <- paste("aggregate(N0k, list(", paste("strataMat[,",length(strata):1,"]", sep="", collapse=", "), "), FUN=sum)", sep="")
phaseI  <- eval(parse(text=aggCall))
names(phaseI) <- c(colnames(strataMat)[length(strata):1], "N0k")
aggCall <- paste("aggregate(N1k, list(", paste("strataMat[,",length(strata):1,"]", sep="", collapse=", "), "), FUN=sum)", sep="")
phaseI\$N1k <- eval(parse(text=aggCall))\$x
##
phaseI[,1:length(strata)] <- phaseI[,length(strata):1]  ## Reverse back again; note the colnames don't reverse so it done in the next loop
##
for(i in 1:nrow(phaseI))
rownames(phaseI)[i] <- paste(colnames(phaseI)[length(strata):1], "=", phaseI[i, 1:length(strata)], " ", collapse = " ")
phaseI <- round(phaseI[,-c(1:length(strata))])
##
cat("Expected Phase I counts:\n")
print(phaseI)

##
if(!is.null(nII0) && !is.null(nII1))
{
##
temp <- apply(strataMat, 2, unique)
if(is.matrix(temp))
K <- prod(apply(temp, 2, FUN=length))
if(is.list(temp))
K <- prod(unlist(lapply(temp, FUN=length)))
##
if(length(nII0) != K | length(nII1) != K)
stop("* invalid dimension of 'nII0' or 'nII1'")
##
if((sum(phaseI[,1] < nII0) > 0) | (sum(phaseI[,2] < nII1) > 0))
cat("\nWarning: At least one Phase I stratum does not have sufficiently many non-cases or cases\n\n")

## Phase II counts
##
phaseII <- phaseI
phaseII[,1] <- pmin(phaseI[,1], nII0)
phaseII[,2] <- pmin(phaseI[,2], nII1)
colnames(phaseII) <- c("nII0k", "nII1k")

## Sampling probabilities
##
phaseIIprob <- phaseII/phaseI
colnames(phaseIIprob) <- c("p0k", "p1k")
if(is.null(digits))
digits <- max(-log10(phaseIIprob)) + 2
phaseIIprob <- round(phaseIIprob, digits=digits)

##
cat("\nPhase II sample size:\n")
print(phaseII)
cat("\nExpected Phase II sampling probabilities:\n")
print(phaseIIprob)
}

##
invisible()
}
```

## Try the osDesign package in your browser

Any scripts or data that you put into this service are public.

osDesign documentation built on May 29, 2017, 8:45 p.m.