R/screentreat_library.R

#############################################################################
# OLD screentreat_library.R file
#############################################################################

# The plan for this file is to delete functions as they get migrated


############################################################
## sim_clinical_incidence
############################################################

#' Use sim_KM to efficiently simulate ages at clinical incidence for nsim populations
#' 
#' Given a population with a clinical incidence survival curve and ages, returns random draws of their ages at incidence
#' @param popdata Data frame where individuals are rows, with columns birth_year, age, and male, OR a list of data frames that allow build_simpop() to construct this
#' @param bootrows Matrix or data frame of row indicators that can be applied to the data to recover different bootstraps of the data. Each column is a different bootstrap of thedata. OR, list of data frames with these row indicators for use with build_simpop()
#' @param incidence Matrix/df with columns 'age' and 'cumsurv'
#' @param results_as_matrix Convert the results from a data frame to a matrix?
#'
#' @return Data frame or matrix of ages at clinical incidence, capped at age 121. Age at incidence will always be after age at entry.
#' 
#' @examples
#' # Prepare incidence data
#' library(bcimodel)
#' data(incratesf)
#' inc <- interpolate_cumsurv(incratesf, 
#'                           ratevar='Female.Rate.Per.100K',
#'                           country='Uganda',
#'                           maxage=100)
#' # Example 1: use the list approach
#' # Simple age distribution
#' ages <- data.frame(age=c(20,30,40), prop=c(0.2, 0.5, 0.3))
#' # Simulate a population of size 10 twice, using these proportions and the row IDs
#' sim.age.rows <- sim_multinom(nsims=10, 2, ages$prop, names=1:nrow(ages))
#' # Now specify they're all female
#' sex <- data.frame(male=0, prop=1)
#' sim.sex.rows <- matrix(1, nrow=10, ncol=2)
#' # Create the lists
#' poplist <- list(sex, ages)
#' rowlist <- list(sim.sex.rows, sim.age.rows)
#' # Get ages at clinical incidence
#' incage <- sim_clinical_incidence(poplist, rowlist, inc)
#' 
#' # Example 2
#' # Alternatively, pass a population in data frame form and just use its normal rows
#' # Use build_simpop and the 2nd simulation's row IDs
#' pop <- build_simpop(poplist, rowlist, sim=2)[,c('male', 'age')]
#' poprows <- replicate(2, 1:nrow(pop))
#' incage <- sim_clinical_incidence(pop, poprows, inc)
#' 
#' @export
#' @importFrom plyr ddply
#' @importFrom plyr .

sim_clinical_incidence = function(popdata, bootrows, 
                                  incidence, results_as_matrix=FALSE) { 
    # Determine number of simulations
    nsim = ncol(bootrows)
    if (is.null(nsim)) nsim = ncol(bootrows[[1]])

    # Estimate for each simulation separately
    newpop = sapply(1:nsim,
        function(y) {
            # Construct the dataset
            if (!is.data.frame(popdata)) {
                thisboot = subset(build_simpop(datalist=popdata,
                                               rowlist=bootrows,
                                               sim=y),
                                  select=c(age))
            } else {
                thisboot = popdata[bootrows[, y], 
                                   c("age",
                                     "male")]
            }
            thisboot$tempid = 1:nrow(thisboot)
            
            # Simulate ages at incidence
            inc = ddply(thisboot,
                        .(age),
                        function(x) {
                            # Extract age
                            Age = min(x$age)

                            # Nsims
                            if (is.data.frame(x)) nrow=nrow(x) else nrow=1

                            # Max draw based on current age
                            maxu <- incidence[incidence$age==Age,
                                              'cumsurv']
                            ageinc <- with(incidence, 
                                           sim_KM(cumsurv, 
                                                  age, 
                                                  smalltimes=Age, 
                                                  bigtimes=121, 
                                                  nsims=nrow,
                                                  maxdraw=maxu))
                            if (sum(is.na(ageinc))!=0) browser()
                            ageinc <- data.frame(x, ageinc=matrix(ageinc, 
                                                                  nrow=nrow,
                                                                  ncol=1))
                            return(ageinc)
                        })
            
            # Return to original sort order, and return 
            # ageOC
            inc = inc[order(inc$tempid), ]
            return(inc$ageinc)
        })
    
    # Return results as a matrix?
    if (results_as_matrix) {
        colnames(newpop) = 1:nsim
        newpop = as.matrix(newpop)
    }

    return(newpop)

### A data frame or matrix with nsim columns of randomly
### drawn ages at other-cause death for individuals (rows)
}


############################################################
## return_value_from_id
############################################################

return_value_from_id = function# Use an ID no to look up a value

##description<< Using a matrix of IDs, look up a value in a dataframe

(ids,
    ### Matrix of ids
df,
    ### Data frame containing the values of interest, sorted
    ### by ID (i.e. row #s == ID #s)
value
    ### Column name of value to return from the df
) {

    return(apply(ids, 2, function(x) df[,value][x]))
    
}

############################################################
## sapply_withnames
############################################################

sapply_withnames = function# Use sapply a certain way

##description<< sapply with USE.NAMES=TRUE and simplify=FALSE

(list_object,
    ### List to sapply
funX,
    ### Function to pass to FUN=...
...
    ### Other arguments to pass to sapply
) {

    sapply(list_object,
           FUN=as.function(funX),
           ...,
           USE.NAMES=TRUE,
           simplify=FALSE)
    
}


############################################################
## summarize_over_sims
############################################################

summarize_over_sims = function# Return the mean and 95% interval over sims

##description<< Return the mean and 95% interval over sims. Sims are 
##represented as columns

(data,
    ### Matrix, with columns (default) or rows as different sims
 funX,
    ### Function to apply to columns, e.g. 'mean' or 'sum'
 ...,
    ### Optional arguments to mean and quantile, e.g. 'na.rm=TRUE'
 applyto=2,
    ### 2=columns, 1=rows
 onecell=FALSE,
    ### Old format - UI in separate column
 numdec=4
    ### Rounding
) {
    
    fixdec <- function(x, k) format(round(x, k), nsmall=k)
  
    estimates <- apply(data, applyto, funX)
  
    if (onecell)  {
      ci <- apply(data, applyto, quantile, probs=c(0.025, 0.975))
      returnvector <- paste0(fixdec(estimates, numdec), 
                             ' (',
                             paste(fixdec(ci[1,],numdec),
                                    fixdec(ci[2,], numdec),
                                   sep=','),
                             ')')
      names(returnvector) <- names(estimates)
      return(returnvector)
    } else {
      return(data.frame(Estimate=round(mean(estimates, ...),numdec),
                        UI=paste0('(',
                                  paste(round(quantile(estimates, 
                                                       probs=c(0.025, 0.975),
                                                       ...),numdec),
                                        collapse=','),
                                  ')')))
      
    }
}

############################################################
## tally_years_simple
############################################################

tally_years_simple = function# Return years of life lived
 
##description<< For specified follow-up times, return years lived
##patterned after tally_cuminc_simple
 
(followup,
    ### Vector of follow-up times
 etimes,
    ### List of matrices of times to event
 per=10000
    ### Denominator for cumulative incidence
) {
    years <- lapply(followup,
                     function(x) {
                       sapply(names(etimes), function(y) {
                         timesmat <- matrix(x,ncol=ncol(etimes[[y]]),
                                            nrow=nrow(etimes[[y]]))
                         cd_by_x <- pmin(timesmat, etimes[[y]])
                         (colSums(cd_by_x)/nrow(cd_by_x))*per
                       })
                     })  
    
    names(years) <- followup
    return(years)
}
 
############################################################
## tally_cuminc_simple
############################################################

tally_cuminc_simple = function# Return cumulative incidence
 
##description<< For specified follow-up times, return cumulative
##incidence of event across all trials and arms and sims
 
(followup,
    ### Vector of follow-up times
 etimes,
    ### List of matrices of times to event
 event,
    ### List of matrices of event indicators (1==yes, 0=no)
 per=10000
    ### Denominator for cumulative incidence
) {
    cuminc <- lapply(followup,
                     function(x) {
                       sapply(names(etimes), function(y) {
                         cd_by_x <- etimes[[y]]<x & event[[y]]==1
                         (colSums(cd_by_x)/nrow(cd_by_x))*per
                       })
                     })  
    
    names(cuminc) <- followup
    return(cuminc)
}
 
############################################################
## tally_cuminc
############################################################

tally_cuminc = function# Return cumulative incidence

##description<< For specified follow-up times, return cumulative
##incidence of an event

(followup,
    ### Vector of follow-up times
 etimes,
    ### Matrix of times to event
 event,
    ### Matrix of event indicator (1==yes, 0=no)
 etimes2=NULL,
    ### Optional matrix of times to event. Will be considered
    ### a screening arm compared to etimes as the control arm
 event2=NULL,
    ### Optional matrix of event indicator for 
    ### etimes2
 numdec=4 
    ### Decimals
) {

    cuminc <- lapply(followup,
               function(x) {
                   cd_by_x <- etimes<x & event==1
                   summarize_over_sims(cd_by_x, 'sum')
               })

    if (is.null(etimes2)) {
        return(data.frame(`Follow-Up Year`=followup,
                          do.call('rbind', cuminc)))

    } else {

        cuminc2 <- lapply(followup,
                   function(x) {
                       cd_by_x <- etimes<x & event==1
                       cd_by_x2 <- etimes2<x & event2==1

                       # Compute MRR, ARR and NNS for each sim
                       mrr <- matrix(colSums(cd_by_x2)/colSums(cd_by_x),
                                      nrow=1)
                       arr <- matrix(colMeans(cd_by_x)-colMeans(cd_by_x2),
                                      nrow=1)
                       nns <- 1/arr
                       if (sum(is.nan(mrr))>(ncol(etimes)/2))
                           warning('Warning: more than half of sims have no deaths in the control group')

                       # Now summarize over sims
                       toreturn <- data.frame(
                            `Follow-Up Year`=x,
                            Measure=c('Cumulative Incidence, Group 0',
                                      'Cumulative Incidence, Group 1',
                                      'MRR',
                                      'ARR',
                                      'NNS'),
                            rbind(
                               summarize_over_sims(cd_by_x, 'mean'),
                               summarize_over_sims(cd_by_x2, 'mean'),
                               summarize_over_sims(mrr, 'mean', na.rm=TRUE),
                               summarize_over_sims(arr, 'mean', na.rm=TRUE),
                               summarize_over_sims(nns[, is.finite(nns), 
                                                   drop=FALSE], 
                                                    'mean', na.rm=TRUE)
                            ))

                       # Replace summary means for MRR, ARR and NNS with
                       # means derived from the mean deaths in control
                       # and screening arms

                           # 1/23/15 edit: since we may not want to do this,
                           # I'm creating duplicate columns to save the original
                           # Estimates
                           toreturn = transform(toreturn, Estimate_Orig=Estimate)

                       screenrow <- which(toreturn$Measure==
                                          'Cumulative Incidence, Group 1')
                       controlrow <- which(toreturn$Measure==
                                           'Cumulative Incidence, Group 0')
                       toreturn[which(toreturn$Measure=='MRR'), 'Estimate'] <- 
                           round(
                               toreturn[screenrow, 'Estimate']/
                               toreturn[controlrow, 'Estimate'],
                           numdec)
                       toreturn[which(toreturn$Measure=='ARR'), 'Estimate'] <- 
                           round(
                               toreturn[controlrow, 'Estimate']-
                               toreturn[screenrow, 'Estimate'],
                           numdec)
                       toreturn[which(toreturn$Measure=='NNS'), 'Estimate'] <- 
                           round(
                                 1/toreturn[which(toreturn$Measure=='ARR'),
                                            'Estimate'],
                           numdec)
                    return(toreturn)
                   })
        return(do.call('rbind', cuminc2))
    }
    
}

############################################################
## age_specific_mortrate
############################################################

age_specific_eventrate = function# Return age-specific cancer mortality rates

##description<< Given age at entry and times to cancer and all-cause death,
##calculate age-specific cancer mortality rates

(ageentry,
    ### Matrix of ages at entry
 timealive,
    ### Matrix of time to all-death from age at entry
 ageEvent,
    ### Matrix of age at event
 equals=TRUE,
    ### Must ageEvent==ageentry+timealive to be 
    ### a qualifying event? This is TRUE when
    ### we are evaluating cancer-specific mortality
    ### and this properly identifies those who 
    ### died of cancer
 denominator=100000,
    ### Denominator for population rates
 prevalence=FALSE
    ### Return prevalence percent instead of rate
) {

    # Determine 5-year age-groups in the population
    agesatdeath <- ageentry + timealive
    age_groups <- sort(unique(floor(c(round(agesatdeath/5,0))*5)))
    age_groups <- c(age_groups, max(age_groups)+5)

    # For each age group, determine person-years lived in each sim
    rates <- sapply(as.character(age_groups),
                           function(a, agesatdeath, ageEvent,
                                    prev=prevalence, interval=5,
                                    n=nrow(ageEvent), p=ncol(ageEvent)) { 
                              a <- as.numeric(a)
                              upper <- a+interval

                              # Determine the subset of eligible individuals
                              # For cause-specific mort rates: 
                              # ageEvent==agesatdeath for cancer deaths
                              # For other rates:
                              # ageEvent must happen before death
                              if (equals) 
                                  subset <- (ageEvent==agesatdeath)
                              else 
                                  subset <- (ageEvent<=agesatdeath)
                              # Start out with inInterval=0
                              inInterval <- matrix(0, nrow=n, ncol=p)
                              # Evaluate interval events
                              happened <- ageEvent<upper 
                              inInterval[subset & ageEvent>=a & happened] <- 1
                              # check: 
                              # summary(c(agesatdeath[as.logical(inInterval)]))

                              # Now determine years alive
                              yrsalive <- agesatdeath-a
                              yrsalive[yrsalive<0] <- 0
                              if (prev) alive <- (yrsalive>0)
                              yrsalive[yrsalive>interval] <- interval

                              # Compute rates 
                              if (!prev) 
                                  rates <- colSums(inInterval)/colSums(yrsalive) 
                              else 
                                  rates <- colSums(subset & happened & alive)/colSums(alive)
                             
                              return(rates)
                                        
                           }, agesatdeath, ageEvent, prevalence,
                           USE.NAMES=TRUE)

    if (prevalence) denominator <- 100
    rates <- data.frame(`Age Group`=age_groups,
                        Rate=colMeans(rates)*denominator)

    return(rates)
}

############################################################
## mean_matrix_subgroup
############################################################

mean_matrix_subgroup = function# Return the mean of a numeric matrix

##description<< Optional subgroup indicator

(mmatrix,
    ### Matrix, with columns as different sims
 subset=NULL
    ### Optional logical matrix of same dimensions indicating
    ### a subset over which to compute the mean
) {

    if (is.null(subset)) subset <- matrix(TRUE, nrow=nrow(mmatrix),
                                          ncol=ncol(mmatrix))
    return(mean(mmatrix[subset]))
}
cancerpolicy/bcimodel documentation built on June 30, 2019, 12:39 a.m.