require(hreport)  # add to above: results='hide'
knitrSet(lang='markdown', fig.path='figure/')
options(prType='html')
mu   <- markupSpecs$html   # in Hmisc - HTML markups
frac <- mu$frac
mu$styles()              # define HTML styles, functions
## Generate test data
set.seed(1)
n <- 500
d <- data.frame(country=sample(c('US', 'Canada', 'Spain', 'France',
                  'Germany'), n, TRUE),
                site=sample(1:10, n, TRUE))
d$site   <- paste(substring(d$country, 1, 2), d$site, sep='')
d$region <- factor(ifelse(d$country %in% c('US', 'Canada'),
                          'North America', 'Europe'))

d <- upData(d, edate = as.Date('2005-01-01') +
            round(rgamma(n, 2, .01)) - 600 * (country == 'US'),
            rdate = edate + round(runif(n, 1, 30)), print=FALSE)
d$rdate[runif(nrow(d)) < 0.5] <- NA  # non-randomized subjects )

# with(d, table(region, country))

# For US manually compute # randomized per month
us   <- subset(d, country == 'US')
site <- us$site
ed   <- us$edate
rd   <- us$rdate
months <- difftime(as.Date('2007-12-31'), ed, units='days') /
  (365.25 / 12)
m <- max(months)
a <- sum(!is.na(rd)) / as.numeric(m)   # .8545774 (agrees with chart)
# Compute maximum months elapsed for each site then sum over sites
maxpersite <- tapply(months, site, max)
b <- sum(!is.na(rd)) / sum(maxpersite)
## 0.0864429 = 47 / 543.6715 chart: .08645 (rounded)

## Suppose there are more subjects enrolled and randomized than really
## made their way into the dataset
denom <- c(enrolled=nrow(d) * 1.1,
           randomized=sum(!is.na(d$rdate)) + 10)

sethreportOption(tx.var='treat', denom=denom)
## Initialize file to hold appendix information such as subject IDs
## so all later writing to this file can use append=TRUE

Introduction

Interactive Graphs

Most of the graphs produced here are semi-interactive. One can hover over elements of graphs with the mouse to have detailed information pop up.

Figure Captions

Needles represent the fraction of observations used in the current analysis. The first needle (red) shows the fraction of enrolled patients used. If randomization was taken into account, a second needle (green) represents the fraction of randomized subjects included in the analysis. When the analyses consider treatment assignment, two more needles may be added to the display, showing, respectively, the fraction of subjects randomized to treatment A used in the analysis and the fraction of subjects on treatment B who were analyzed. The colors of these last two needles are the colors used for the two treatments throughout the report. The following table shows some examples. dNeedle uses colors in sethreportOption(tx.col=, er.col=).

# Store using short variable names so Rmarkdown table column
# width will not be wider than actually needed
d1 <- dNeedle(1)
d2 <- dNeedle((3:4)/4)
d3 <- dNeedle((1:2)/4)
d4 <- dNeedle(c(1,2,3,1)/4)

|Signpost | Interpretation | |------- | -------------------------------------------------| | r d1 | All enrolled subjects analyzed, randomization not considered| | r d2 | Analysis uses r frac(3,4) of enrolled subjects, and all randomized subjects| | r d3 | Analysis uses r frac(1,4) of enrolled subjects, and r frac(1,2) of randomized subjects| | r d4 | Same as previous example, and in addition the analysis utilized treatment assignment, analyzing r frac(3,4) of those randomized to A and r frac(1,4) of those randomized to B|

Survival Curves

Graphs containing pairs of Kaplan-Meier survival curves show a shaded region centered at the midpoint of the two survival estimates and having a height equal to the half-width of the approximate 0.95 pointwise confidence interval for the difference of the two survival probabilities. Time points at which the two survival estimates do not touch the shaded region denote approximately significantly different survival estimates, without any multiplicity correction.

Accrual

accrualReport(enroll(edate) + randomize(rdate) ~
              region(region) + country(country) + site(site),
              data=d,
              dateRange=c('2005-01-01', '2007-12-31'),
              targetN=
                data.frame(edate=c(500, 1000), rdate=c(250, 500)),
              targetDate=c('2006-01-01', '2007-12-31'),
              closeDate='2007-12-31')

Exclusions

f <- 0.05
d <- upData(d,
            subjid = 1 : n,
            pend   = rbinom(n, 1, .1),
            e1     = rbinom(n, 1, f),
            e2     = rbinom(n, 1, f),
            e3     = rbinom(n, 1, f),
            e4     = ifelse(runif(n) < 0.25, NA, rbinom(n, 1, .10)),
            tested = rbinom(n, 1, .75),
            e5     = ifelse(tested, rbinom(n, 1, .04), NA),
            e6     = rbinom(n, 1, f),
            e7     = rbinom(n, 1, f),
            rndz   = rbinom(n, 1, .75),
            labels=c(e1='Prior MI', e2='History of Asthma',
              e3='History of Upper GI Bleeding',
              e4='No Significant CAD', e5='Inadequate Renal Function',
              e6='Pneumonia within 6 weeks', e7='Prior cardiac surgery'),
            print=FALSE)

erd <- data.frame(subjid = 1 : 50,
                  loc   = sample(c('gastric', 'lung', 'trachea'), 50, TRUE))

# To check warning messages, greportOption denom does not match pend, e1-e7
options(dumpfile='/tmp/z')    # for seeing frequencies of all combinations
exReport(~ pending(pend) + e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         randomized(rndz) + id(subjid) + cond(e5, 'Tested', tested),
         erdata = erd,
         whenapp= c(e4='CCTA done'), data=d) #, hc=3.75, h=4)

# Show exclusions in original variable order
if(FALSE) exReport(~ pending(pend) + e1 + e2 + e3 + e4 + e5 + e6 + e7 +
         randomized(rndz) + id(subjid) + cond(e5, 'Tested', tested),
         erdata=erd,
         whenapp=c(e4='CCTA done'), data=d, #hc=3.75, h=4,
         sort=FALSE, app=FALSE)

Baseline Variables

n <- 100
f <- function(na=FALSE) {
  x <- sample(c('N', 'Y'), n, TRUE)
  if(na) x[runif(100) < .1] <- NA
  x
}
set.seed(1)
outs <- c('home', 'hospitalized', 'organ support', 'dead')
d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(),
                x7=f(TRUE),
                age=rnorm(n, 50, 10),
                sbp=rnorm(n, 120, 7),
                dbp=rnorm(n,  80, 6),
                days=sample(1:n, n, TRUE),
                race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE),
                sex=sample(c('Female', 'Male'), n, TRUE),
                treat=sample(c('A', 'B'), n, TRUE),
                region=sample(c('North America','Europe'), n, TRUE),
                meda=sample(0:1, n, TRUE),
                medb=sample(0:1, n, TRUE),
                                outcome=factor(sample(1:4, n, TRUE), 1:4, outs),
                subjid=1:n)
d$days[1] <- NA
d$race[1:10] <- NA
d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines',
                 x5='Pregnant', x6='Other event', x7='MD withdrawal',
                 race='Race', sex='Sex', treat='treatment',
                 sbp='Systolic BP', days='Time Since Randomization',
                 meda='Medication A', medb='Medication B',
                                 outcome='Ordinal Outcome'),
            units=c(sbp='mmHg', dbp='mmHg', age='years', days='days'),
            print=FALSE)
dasna <- subset(d, region=='North America')
# with(dasna, table(race, treat))
den <- c(enrolled=n + 50, randomized=n, table(d$treat))
sethreportOption(denom=den, tx.var='treat')

dReport(race + sex +
        ynbind(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~ 1,
        head='Overall frequencies of categorical demographic variables and exclusions',
        data=d, w=4, h=4.5)

dReport(race + sex ~ treat,
        groups='treat',
        head='Categorical demographic variables',
        data=d, w=4, h=4.5)


dReport(race + sex ~ region, data=addMarginal(d, region),
        w=4.75, h=3.75,
        head='Demographics')

dReport(race + sex ~ region + treat, data=addMarginal(d, region),
        groups='treat',
        w=4.75, h=3.75,
        head='Demographics')

## Add a new block of variables that apply only to males
dReport(race + sex +
        pBlock(race, subset=sex=='Male', label='Race: Males') ~ region,
        data=d, groups='region',
        head='Demographics with race for males')

# Show raw data and smoothed relationship between age and sbp, stratified.
dReport(sbp ~ age + treat, groups='treat', data=d,
        popts=list(fitter=loess, showpts=TRUE), what='xy')

# Show ECDFs of sbp and age stratified by treatment+reference lines at quartiles
dReport(sbp + age ~ treat, groups='treat', what='ecdf', data=d,
        sopts=list(q=(1:3)/4, ncols=2, width=750))

# Show means and confidence intervals by treatment as dot chart
f <- function(x) {
  x <- x[! is.na(x)]
  c(smean.cl.normal(x, na.rm=FALSE), n=length(x))
}

dReport(sbp ~ treat, data=d,
        fun = f, head='Mean and confidence limits')
#        popts = list(textplot='Mean', digits=1),

Ordinal Outcome Summary

dReport(outcome ~ treat + sex, groups='treat', what='stacked', data=d, w=800, h=400)

Medication Usage Over Time

dReport(meda + medb ~ days + treat, what='xy',
        groups='treat', data=d,
        popts=list(fitter=loess, xlim=c(0, 130), ylim=c(0, 1), width=750),
        head='Medication usage',
        tail='Tick marks indicate mean measurement times within intervals.')

# Show number being followed as days since randomization gets larger
# make sure nriskReport doesn't get fooled by duplicate data
d2 <- rbind(d, d)
require(data.table)

nriskReport(days ~ region + id(subjid),
            data=addMarginal(d2, region),
            head='Number of subjects followed for medication usage')

# Make up some new visits to have more than 1/subj.
# Make up a new definition of time zero
d2$days[(n + 1) : (2 * n)] <- sample(1 : n, n, TRUE)
nriskReport(days ~ treat + id(subjid), data=d2, time0='PCI')
nriskReport(days ~ treat + region, data=d)

# Make a dataset with many records per subject
d <- data.frame(days    = sample(30:365, 1000, replace=TRUE),
                subjid  = sample(1:100,  1000, replace=TRUE))
# Get a richer set of output when there is not stratification
nriskReport(days ~ id(subjid), data=d)

Time to Hospitalization and Surgery

set.seed(1)
n <- 400
dat <- data.frame(t1=runif(n, 2, 5), t2=runif(n, 2, 5),
                  e1=rbinom(n, 1, .5), e2=rbinom(n, 1, .5),
                  cr1=factor(sample(c('cancer','heart','censor'), n, TRUE),
                             c('censor', 'cancer', 'heart')),
                  cr2=factor(sample(c('gastric','diabetic','trauma', 'censor'),
                                    n, TRUE),
                             c('censor', 'diabetic', 'gastric', 'trauma')),
                  treat=sample(c('a','b'), n, TRUE))
dat$t1[1:7] <- NA
dat <- upData(dat,
              labels=c(t1='Time to operation',
                       t2='Time to rehospitalization',
                       e1='Operation', e2='Hospitalization',
                       treat='Treatment'),
              units=c(t1='Year', t2='Year'), print=FALSE)
denom <- c(enrolled=n + 40, randomized=400, a=sum(dat$treat=='a'),
           b=sum(dat$treat=='b'))
sethreportOption(denom=denom, tx.var='treat')
survReport(Surv(t1, e1) + Surv(t2, e2) ~ treat, data=dat)
# Show estimates combining treatments
survReport(Surv(t1, e1) + Surv(t2, e2) ~ 1, data=dat,
           what='S', times=3, ylim=c(.1, 1))

# Cumulative incidence under completing risks
survReport(Surv(t1, cr1) ~ treat, data=dat, cause=c('cancer', 'heart'))

Adverse Events

For this example, the denominators for the two treatments in the pop-up needles will be incorrect because the dataset did not have subject IDs.

# Original source of aeanonym: HH package
# aeanonym <- read.table(hh("datasets/aedotplot.dat"), header=TRUE, sep=",")
# Modified to remove denominators from data and to generate raw data
# (one record per event per subject)

ae <-
structure(list(RAND = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("a", 
"b"), class = "factor"), PREF = structure(c(12L, 12L, 
18L, 18L, 26L, 26L, 33L, 33L, 5L, 5L, 27L, 27L, 6L, 6L, 15L, 
15L, 22L, 22L, 23L, 23L, 31L, 31L, 17L, 17L, 2L, 2L, 3L, 3L, 
13L, 13L, 25L, 25L, 28L, 28L, 14L, 14L, 4L, 4L, 8L, 8L, 19L, 
19L, 21L, 21L, 29L, 29L, 10L, 10L, 20L, 20L, 16L, 16L, 32L, 32L, 
11L, 11L, 1L, 1L, 30L, 30L, 24L, 24L, 9L, 9L, 7L, 7L),
  .Label = tolower(c("ABDOMINAL PAIN", 
"ANOREXIA", "ARTHRALGIA", "BACK PAIN", "BRONCHITIS", "CHEST PAIN", 
"CHRONIC OBSTRUCTIVE AIRWAY", "COUGHING", "DIARRHEA", "DIZZINESS", 
"DYSPEPSIA", "DYSPNEA", "FATIGUE", "FLATULENCE", "GASTROESOPHAGEAL REFLUX", 
"HEADACHE", "HEMATURIA", "HYPERKALEMIA", "INFECTION VIRAL", "INJURY", 
"INSOMNIA", "MELENA", "MYALGIA", "NAUSEA", "PAIN", "RASH", "RESPIRATORY DISORDER", 
"RHINITIS", "SINUSITIS", "UPPER RESP TRACT INFECTION", "URINARY TRACT INFECTION", 
"VOMITING", "WEIGHT DECREASE")), class = "factor"), SAE = c(15L, 
9L, 4L, 9L, 4L, 9L, 2L, 9L, 8L, 11L, 4L, 11L, 9L, 12L, 5L, 12L, 
7L, 12L, 6L, 12L, 6L, 12L, 2L, 14L, 2L, 15L, 1L, 15L, 4L, 16L, 
4L, 17L, 11L, 17L, 6L, 20L, 10L, 23L, 13L, 26L, 12L, 26L, 4L, 
26L, 13L, 28L, 9L, 29L, 12L, 30L, 14L, 36L, 6L, 37L, 8L, 42L, 
20L, 61L, 33L, 68L, 10L, 82L, 23L, 90L, 76L, 95L)), .Names = c("RAND", 
"PREF", "SAE"), class = "data.frame", row.names = c(NA, 
-66L))

subs <- rep(1 : nrow(ae), ae$SAE)
ae <- ae[subs, c('RAND', 'PREF')]
names(ae) <- c('treat', 'event')
label(ae$treat) <- 'Treatment'

denom <- c(enrolled=1000,
           randomized=400,
                     a=212, b=188)

sethreportOption(tx.var='treat', denom=denom)

eReport(event ~ treat, data=ae, minincidence=.05)

To show hazard rate estimates from exponential distributions (events per person time of exposure), make up total exposure time for patients on each treatment, in person-years.

exposure <- c(a=10000, b=9000)
eReport(event ~ treat, exposure=exposure, expunit='year', data=ae)


harrelfe/hreport documentation built on July 26, 2021, 9:09 a.m.