R/gen.exp.R In Epi: Statistical Analysis in Epidemiology

Documented in gen.exp

# Acronyms:
# dop: date of purchase
# dpt: dose per time
# amt: amount
# dur: duration
# dof: date of follow-up

######################################################################
# A function to compute cumulative doses etc. at the purchase dates
# *and* derive intervals of drug coverage resp no coverage from the
# purchase date, amounts and dosage per time
use.amt.dpt <-
function( purchase, # data frame with dop, amt and dpt
push.max,
rm.dose )
{
do.call( "rbind",
lapply( split( purchase, purchase\$id ),
function(set)
{
np <- nrow(set)
if( np==1 ) # return data frame of 2 rows
{
dfR <- data.frame( id = set\$id,
dof = set\$dop + c(0,set\$amt/set\$dpt),
amt = c(set\$amt,0),
dpt = c(set\$dpt,0),
off = c(FALSE,TRUE),
cum.amt = c(0,set\$amt),
cum.tim = c(0,set\$amt/set\$dpt) )
return( dfR )
}
set <- set[order(set\$dop),]
# Compute length of exposure periods
drug.dur  <- set\$amt / set\$dpt
# Put the exposed period head to foot
new.start <- min( set\$dop ) + c(0,cumsum(drug.dur[-np]))
# Move them out so that the start of a period is never earlier than
# the dop
exp.start <- new.start + cummax( pmax(set\$dop-new.start,0) )
# Compute the pushes
push.one  <- exp.start - set\$dop
# Revise them to the maximally acceptable
push.adj  <- pmin( push.one, push.max )
# Revise the starting dates of exposure
exp.start <- exp.start - push.one + push.adj
# Revise the durations to be at most equal to differences between the
# revised starting dates
drug.dur  <- pmin( drug.dur, c(diff(exp.start),Inf) )
# Compute the end of the intervals
exp.end   <- exp.start + drug.dur
# Find out which exposure intervals that are followed by a gap:
followed.by.gap <- c( exp.start[-1]-exp.end[-np] > 0, TRUE )
# To facilitate further calculations we add records for the gaps
# and also a record for the date of drug expiration
dfR <- rbind( data.frame( id = set\$id,
dof = exp.start,
amt = set\$amt,
dpt = set\$dpt ),
data.frame( id = set\$id,
dof = exp.end[followed.by.gap],
amt = 0,
dpt = 0 ) )
dfR <- dfR[order(dfR\$dof),]
# Logical indicator of intervals off drug
dfR\$off <- (dfR\$dpt==0) | (dfR\$dof < min(set\$dop))
# Finally compute the cumulative dose and time on drug at the end of
# the interval using interval length and dpt:
if( rm.dose ) {
dfR\$cum.amt <- with( dfR, cumsum( c(0, diff(dof)* dpt[-length(dpt)]) ) )
} else dfR\$cum.amt <- cumsum( c( 0, dfR\$amt[-nrow(dfR)] ) )
dfR\$cum.tim <- with( dfR, cumsum( c(0, diff(dof)*!off[-length(dpt)]) ) )
return( dfR )
} ) )
}

######################################################################
# Compute the cumulative dose at all purcase dates and at the last
# (unknown) future expiry date, computed based on previous
# consumption.  The resulting data frame has one more line per person
# than no. of purchases.
use.only.amt <-
function( purchase,
pred.win )
{
do.call( "rbind",
lapply( split( purchase, purchase\$id ),
function(set)
{
np <- nrow(set)
if( np==1 ) return( NULL )
set <- set[order(set\$dop),]
# The points to include in the calculation:
# All dates after pred.win before last purchase,
# but at least the last two purchase dates,
wp <- ( set\$dop > pmin( max(set\$dop)-pred.win,
sort(set\$dop,decreasing=TRUE) ) )
# Cumulative amount consumed at each dop since first
cum.amt <- cumsum(c(0,set\$amt))
# Average slope to use to project the duration of last purchase
avg.slp <- diff(range(cum.amt[c(wp,FALSE)]))/
diff(range(set\$dop[wp]))
# Purchase dates and the date of last consumption
dof <- c( set\$dop, set\$dop[np]+set\$amt[np]/avg.slp )
# Cumulative time since first
cum.tim <- dof - set\$dop
# The only time without drug is after last dispense expired
off <- rep( c(FALSE,TRUE), c(length(dof)-1,1) )
return( data.frame( id = set\$id,
dof = dof,
cum.amt = cum.amt,
cum.tim = cum.tim,
off = off ) ) # will be used in the subsequent code
} ) )
}

######################################################################
# Function to interpolate the drug exposures to the dates of FU.
dex.int <-
function( set, breaks, lags, lag.dec )
{
dof <- NULL
# If only one record return null
if( nrow(set) < 2 ) return( NULL )
# All values of these are identical within each set (=person)
doe <- set\$doe
dox <- set\$dox
# The first date of drug exposure according to the assumption
doi <- min(set\$dof)
# Breakpoints and the entry end exit dates are the dates we care for
# - but only within breaks
doin <- max( min(breaks), doe )
doex <- min( max(breaks), dox )
breaks <- sort( unique( c(breaks,doin,doex) ) )
# only the breaks inside the follow-up interval
xval <- breaks[breaks>=doin & breaks<=doex]
if( length(xval) == 0 ) return( NULL )
# Merge a) the purchase dates (set\$dof) and
#    indicator being off drug (set\$off) and
# the date a person goes off drug, doff (set to NA for remaining records)
# with b) the break dates (which is where we want things computed)
# Note we merge on the variable dof and have the data frame with
# dof=xval as the >first< so that values of xval will be in the
# resulting dfr\$dof even if dof-values in set are almost equal til
# an xval value. Ensures that sum(dfr\$dof %in% xval)==length(xval)
dfr <-  merge( data.frame( id = set\$id,
dof = xval ),
data.frame( set[,c("id","dof","off")],
doff = ifelse(set\$off,set\$dof,NA) ),
all=TRUE )
# carry the off drug indicator forward to all break dates
dfr\$off <- zoo::na.locf( dfr\$off, na.rm=FALSE )
# possible NAs are before any drug purchase hence off-drug
dfr\$off <- ifelse( is.na(dfr\$off), TRUE, dfr\$off )
# carry date of going off drug forward, but only for the off period
dfr\$doff <- zoo::na.locf( dfr\$doff, na.rm=FALSE ) * ifelse(dfr\$off,1,NA)
# time from cessation can now be computed
dfr\$tfc <- dfr\$dof - dfr\$doff
# time from initiation of drug
dfr\$tfi  <- pmax( 0, dfr\$dof-doi )
# restrict to the desired timepoints
dfr <- subset( dfr, dof %in% xval )
dfr <- dfr[!duplicated(dfr\$dof),]
# linear interpolation of the cumulative dose and time from the
# purchase data (set)
dfr\$cdos <- approx( set\$dof, set\$cum.amt, xout=xval, rule=2 )\$y
dfr\$ctim <- approx( set\$dof, set\$cum.tim, xout=xval, rule=2 )\$y
# the same for the desired lags
for( lg in lags )
dfr[,paste( "lag.pre",
formatC(lg,format="f",digits=lag.dec),
sep="" )] <-
approx( set\$dof, set\$cum.amt, xout=xval-lg, rule=2 )\$y
dfr\$doff <- zoo::na.locf( dfr\$doff, na.rm=FALSE )
dfr\$tfc[is.na(dfr\$tfc)] <- 0
dfr\$dur <- c( diff(dfr\$dof), NA )
return( dfr[-nrow(dfr),] )
}

######################################################################
# The function that ties it all together
gen.exp <-
function( purchase,  id="id" , dop="dop", amt="amt", dpt="dpt",
fu, doe="doe", dox="dox",
breaks,
use.dpt = ( dpt %in% names(purchase) ),
push.max = Inf,
rm.dose = FALSE,
lags = NULL,
lag.dec = 1,
lag.pre = "lag.",
pred.win = Inf )
{
# to aviod a NOTE from R CMD check
dof <- NULL
# Make sure that the data frames have the right column names
wh <- match( c(id,dop,amt), names(purchase) )
if( any( is.na(wh) ) ) stop("Wrong column names for the purchase data frame")
names( purchase )[wh] <- c("id","dop","amt")

# Allow dpt to be entered as numerical scalar common for all records
if( use.dpt )
if( is.numeric(dpt) )
{
if( length(dpt) > 1 ) stop("If dpt is numeric it must have length 1\n",
"otherwise it must be the name of the dpt variable in the dataset")
purchase\$dpt <- dpt
}
else names( purchase )[match(dpt,names(purchase))] <- "dpt"

wh <- match( c(id,doe,dox), names(fu) )
if( any( is.na(wh) ) ) stop("Wrong column names for the follow-up data frame")
names( fu )[wh] <- c("id","doe","dox")

if( use.dpt ) { tmp.dfr <- use.amt.dpt( purchase,
push.max = push.max,
rm.dose = rm.dose )
} else { tmp.dfr <- use.only.amt( purchase,
pred.win = pred.win ) }

# Having done what needs to be done about the purchase records we turn
# to the follow-up records, but we first need to accommodate the fact
# that there might be more fu records per person:

# Generate record no within id in fu (ordering in fu is immaterial)
fu\$no <- ave( fu\$id, fu\$id, FUN=function(x) 1:length(x) )
# Set up the object to collect the resulting follow-up
res <- NULL

# loop through subsets of fu with different ids
for( ni in 1:max(fu\$no) )
{
# Merge the follow-up period for the persons to the correponding
# exposure records (hence the all.y=T)
tm.dfr <- merge( tmp.dfr, fu[fu\$no==ni,], by="id", all.y=TRUE )

# Interpolate to find the cumulative doses at the dates in the vector breaks
res.dfr <-
do.call( "rbind",
lapply( split( tm.dfr, tm.dfr\$id ),
dex.int,
breaks, lags, lag.dec ) ) # end of do.call
res <- rbind( res, res.dfr ) # append to result from other fu\$no
} # end of for( ni in 1:max(fu\$no) )

var.order <- c("id","dof","dur","off","doff","tfc","tfi","ctim","cdos",
names(res)[grep("lag.prefix",names(res))] )
res <- res[order(res\$id,res\$dof),var.order]
names(res) <- gsub( "lag.prefix", lag.pre, names(res) )
res
}
# end of gen.ex

Try the Epi package in your browser

Any scripts or data that you put into this service are public.

Epi documentation built on Feb. 28, 2021, 1:06 a.m.