require(hreport) # add to above: results='hide' knitrSet(lang='markdown', fig.path='figure/') options(prType='html') mu <- markupSpecs$html # in Hmisc - HTML markups frac <- mu$frac
mu$styles() # define HTML styles, functions
## Generate test data set.seed(1) n <- 500 d <- data.frame(country=sample(c('US', 'Canada', 'Spain', 'France', 'Germany'), n, TRUE), site=sample(1:10, n, TRUE)) d$site <- paste(substring(d$country, 1, 2), d$site, sep='') d$region <- factor(ifelse(d$country %in% c('US', 'Canada'), 'North America', 'Europe')) d <- upData(d, edate = as.Date('2005-01-01') + round(rgamma(n, 2, .01)) - 600 * (country == 'US'), rdate = edate + round(runif(n, 1, 30)), print=FALSE) d$rdate[runif(nrow(d)) < 0.5] <- NA # non-randomized subjects ) # with(d, table(region, country)) # For US manually compute # randomized per month us <- subset(d, country == 'US') site <- us$site ed <- us$edate rd <- us$rdate months <- difftime(as.Date('2007-12-31'), ed, units='days') / (365.25 / 12) m <- max(months) a <- sum(!is.na(rd)) / as.numeric(m) # .8545774 (agrees with chart) # Compute maximum months elapsed for each site then sum over sites maxpersite <- tapply(months, site, max) b <- sum(!is.na(rd)) / sum(maxpersite) ## 0.0864429 = 47 / 543.6715 chart: .08645 (rounded) ## Suppose there are more subjects enrolled and randomized than really ## made their way into the dataset denom <- c(enrolled=nrow(d) * 1.1, randomized=sum(!is.na(d$rdate)) + 10) sethreportOption(tx.var='treat', denom=denom) ## Initialize file to hold appendix information such as subject IDs ## so all later writing to this file can use append=TRUE
Most of the graphs produced here are semi-interactive. One can hover over elements of graphs with the mouse to have detailed information pop up.
Needles represent the fraction of observations used in the current
analysis. The first needle (red) shows the fraction of enrolled
patients used. If randomization was taken into account, a second
needle (green) represents the fraction of randomized subjects included
in the analysis. When the analyses consider treatment assignment, two
more needles may be added to the display, showing, respectively, the
fraction of subjects randomized to treatment A used in the analysis
and the fraction of subjects on treatment B who were analyzed. The
colors of these last two needles are the colors used for the two
treatments throughout the report. The following table shows some
examples. dNeedle
uses colors in sethreportOption(tx.col=, er.col=)
.
# Store using short variable names so Rmarkdown table column # width will not be wider than actually needed d1 <- dNeedle(1) d2 <- dNeedle((3:4)/4) d3 <- dNeedle((1:2)/4) d4 <- dNeedle(c(1,2,3,1)/4)
|Signpost | Interpretation |
|------- | -------------------------------------------------|
| r d1
| All enrolled subjects analyzed, randomization not considered|
| r d2
| Analysis uses r frac(3,4)
of enrolled subjects, and all randomized subjects|
| r d3
| Analysis uses r frac(1,4)
of enrolled subjects, and r frac(1,2)
of randomized subjects|
| r d4
| Same as previous example, and in addition the analysis utilized treatment assignment, analyzing r frac(3,4)
of those randomized to A and r frac(1,4)
of those randomized to B|
Graphs containing pairs of Kaplan-Meier survival curves show a shaded region centered at the midpoint of the two survival estimates and having a height equal to the half-width of the approximate 0.95 pointwise confidence interval for the difference of the two survival probabilities. Time points at which the two survival estimates do not touch the shaded region denote approximately significantly different survival estimates, without any multiplicity correction.
accrualReport(enroll(edate) + randomize(rdate) ~ region(region) + country(country) + site(site), data=d, dateRange=c('2005-01-01', '2007-12-31'), targetN= data.frame(edate=c(500, 1000), rdate=c(250, 500)), targetDate=c('2006-01-01', '2007-12-31'), closeDate='2007-12-31')
f <- 0.05 d <- upData(d, subjid = 1 : n, pend = rbinom(n, 1, .1), e1 = rbinom(n, 1, f), e2 = rbinom(n, 1, f), e3 = rbinom(n, 1, f), e4 = ifelse(runif(n) < 0.25, NA, rbinom(n, 1, .10)), tested = rbinom(n, 1, .75), e5 = ifelse(tested, rbinom(n, 1, .04), NA), e6 = rbinom(n, 1, f), e7 = rbinom(n, 1, f), rndz = rbinom(n, 1, .75), labels=c(e1='Prior MI', e2='History of Asthma', e3='History of Upper GI Bleeding', e4='No Significant CAD', e5='Inadequate Renal Function', e6='Pneumonia within 6 weeks', e7='Prior cardiac surgery'), print=FALSE) erd <- data.frame(subjid = 1 : 50, loc = sample(c('gastric', 'lung', 'trachea'), 50, TRUE)) # To check warning messages, greportOption denom does not match pend, e1-e7 options(dumpfile='/tmp/z') # for seeing frequencies of all combinations exReport(~ pending(pend) + e1 + e2 + e3 + e4 + e5 + e6 + e7 + randomized(rndz) + id(subjid) + cond(e5, 'Tested', tested), erdata = erd, whenapp= c(e4='CCTA done'), data=d) #, hc=3.75, h=4) # Show exclusions in original variable order if(FALSE) exReport(~ pending(pend) + e1 + e2 + e3 + e4 + e5 + e6 + e7 + randomized(rndz) + id(subjid) + cond(e5, 'Tested', tested), erdata=erd, whenapp=c(e4='CCTA done'), data=d, #hc=3.75, h=4, sort=FALSE, app=FALSE)
n <- 100 f <- function(na=FALSE) { x <- sample(c('N', 'Y'), n, TRUE) if(na) x[runif(100) < .1] <- NA x } set.seed(1) outs <- c('home', 'hospitalized', 'organ support', 'dead') d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE), age=rnorm(n, 50, 10), sbp=rnorm(n, 120, 7), dbp=rnorm(n, 80, 6), days=sample(1:n, n, TRUE), race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE), sex=sample(c('Female', 'Male'), n, TRUE), treat=sample(c('A', 'B'), n, TRUE), region=sample(c('North America','Europe'), n, TRUE), meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE), outcome=factor(sample(1:4, n, TRUE), 1:4, outs), subjid=1:n) d$days[1] <- NA d$race[1:10] <- NA d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines', x5='Pregnant', x6='Other event', x7='MD withdrawal', race='Race', sex='Sex', treat='treatment', sbp='Systolic BP', days='Time Since Randomization', meda='Medication A', medb='Medication B', outcome='Ordinal Outcome'), units=c(sbp='mmHg', dbp='mmHg', age='years', days='days'), print=FALSE) dasna <- subset(d, region=='North America') # with(dasna, table(race, treat)) den <- c(enrolled=n + 50, randomized=n, table(d$treat)) sethreportOption(denom=den, tx.var='treat') dReport(race + sex + ynbind(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~ 1, head='Overall frequencies of categorical demographic variables and exclusions', data=d, w=4, h=4.5) dReport(race + sex ~ treat, groups='treat', head='Categorical demographic variables', data=d, w=4, h=4.5) dReport(race + sex ~ region, data=addMarginal(d, region), w=4.75, h=3.75, head='Demographics') dReport(race + sex ~ region + treat, data=addMarginal(d, region), groups='treat', w=4.75, h=3.75, head='Demographics') ## Add a new block of variables that apply only to males dReport(race + sex + pBlock(race, subset=sex=='Male', label='Race: Males') ~ region, data=d, groups='region', head='Demographics with race for males') # Show raw data and smoothed relationship between age and sbp, stratified. dReport(sbp ~ age + treat, groups='treat', data=d, popts=list(fitter=loess, showpts=TRUE), what='xy') # Show ECDFs of sbp and age stratified by treatment+reference lines at quartiles dReport(sbp + age ~ treat, groups='treat', what='ecdf', data=d, sopts=list(q=(1:3)/4, ncols=2, width=750)) # Show means and confidence intervals by treatment as dot chart f <- function(x) { x <- x[! is.na(x)] c(smean.cl.normal(x, na.rm=FALSE), n=length(x)) } dReport(sbp ~ treat, data=d, fun = f, head='Mean and confidence limits') # popts = list(textplot='Mean', digits=1),
dReport(outcome ~ treat + sex, groups='treat', what='stacked', data=d, w=800, h=400)
dReport(meda + medb ~ days + treat, what='xy', groups='treat', data=d, popts=list(fitter=loess, xlim=c(0, 130), ylim=c(0, 1), width=750), head='Medication usage', tail='Tick marks indicate mean measurement times within intervals.') # Show number being followed as days since randomization gets larger # make sure nriskReport doesn't get fooled by duplicate data d2 <- rbind(d, d) require(data.table) nriskReport(days ~ region + id(subjid), data=addMarginal(d2, region), head='Number of subjects followed for medication usage') # Make up some new visits to have more than 1/subj. # Make up a new definition of time zero d2$days[(n + 1) : (2 * n)] <- sample(1 : n, n, TRUE) nriskReport(days ~ treat + id(subjid), data=d2, time0='PCI') nriskReport(days ~ treat + region, data=d) # Make a dataset with many records per subject d <- data.frame(days = sample(30:365, 1000, replace=TRUE), subjid = sample(1:100, 1000, replace=TRUE)) # Get a richer set of output when there is not stratification nriskReport(days ~ id(subjid), data=d)
set.seed(1) n <- 400 dat <- data.frame(t1=runif(n, 2, 5), t2=runif(n, 2, 5), e1=rbinom(n, 1, .5), e2=rbinom(n, 1, .5), cr1=factor(sample(c('cancer','heart','censor'), n, TRUE), c('censor', 'cancer', 'heart')), cr2=factor(sample(c('gastric','diabetic','trauma', 'censor'), n, TRUE), c('censor', 'diabetic', 'gastric', 'trauma')), treat=sample(c('a','b'), n, TRUE)) dat$t1[1:7] <- NA dat <- upData(dat, labels=c(t1='Time to operation', t2='Time to rehospitalization', e1='Operation', e2='Hospitalization', treat='Treatment'), units=c(t1='Year', t2='Year'), print=FALSE) denom <- c(enrolled=n + 40, randomized=400, a=sum(dat$treat=='a'), b=sum(dat$treat=='b')) sethreportOption(denom=denom, tx.var='treat') survReport(Surv(t1, e1) + Surv(t2, e2) ~ treat, data=dat) # Show estimates combining treatments survReport(Surv(t1, e1) + Surv(t2, e2) ~ 1, data=dat, what='S', times=3, ylim=c(.1, 1)) # Cumulative incidence under completing risks survReport(Surv(t1, cr1) ~ treat, data=dat, cause=c('cancer', 'heart'))
For this example, the denominators for the two treatments in the pop-up needles will be incorrect because the dataset did not have subject IDs.
# Original source of aeanonym: HH package # aeanonym <- read.table(hh("datasets/aedotplot.dat"), header=TRUE, sep=",") # Modified to remove denominators from data and to generate raw data # (one record per event per subject) ae <- structure(list(RAND = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), PREF = structure(c(12L, 12L, 18L, 18L, 26L, 26L, 33L, 33L, 5L, 5L, 27L, 27L, 6L, 6L, 15L, 15L, 22L, 22L, 23L, 23L, 31L, 31L, 17L, 17L, 2L, 2L, 3L, 3L, 13L, 13L, 25L, 25L, 28L, 28L, 14L, 14L, 4L, 4L, 8L, 8L, 19L, 19L, 21L, 21L, 29L, 29L, 10L, 10L, 20L, 20L, 16L, 16L, 32L, 32L, 11L, 11L, 1L, 1L, 30L, 30L, 24L, 24L, 9L, 9L, 7L, 7L), .Label = tolower(c("ABDOMINAL PAIN", "ANOREXIA", "ARTHRALGIA", "BACK PAIN", "BRONCHITIS", "CHEST PAIN", "CHRONIC OBSTRUCTIVE AIRWAY", "COUGHING", "DIARRHEA", "DIZZINESS", "DYSPEPSIA", "DYSPNEA", "FATIGUE", "FLATULENCE", "GASTROESOPHAGEAL REFLUX", "HEADACHE", "HEMATURIA", "HYPERKALEMIA", "INFECTION VIRAL", "INJURY", "INSOMNIA", "MELENA", "MYALGIA", "NAUSEA", "PAIN", "RASH", "RESPIRATORY DISORDER", "RHINITIS", "SINUSITIS", "UPPER RESP TRACT INFECTION", "URINARY TRACT INFECTION", "VOMITING", "WEIGHT DECREASE")), class = "factor"), SAE = c(15L, 9L, 4L, 9L, 4L, 9L, 2L, 9L, 8L, 11L, 4L, 11L, 9L, 12L, 5L, 12L, 7L, 12L, 6L, 12L, 6L, 12L, 2L, 14L, 2L, 15L, 1L, 15L, 4L, 16L, 4L, 17L, 11L, 17L, 6L, 20L, 10L, 23L, 13L, 26L, 12L, 26L, 4L, 26L, 13L, 28L, 9L, 29L, 12L, 30L, 14L, 36L, 6L, 37L, 8L, 42L, 20L, 61L, 33L, 68L, 10L, 82L, 23L, 90L, 76L, 95L)), .Names = c("RAND", "PREF", "SAE"), class = "data.frame", row.names = c(NA, -66L)) subs <- rep(1 : nrow(ae), ae$SAE) ae <- ae[subs, c('RAND', 'PREF')] names(ae) <- c('treat', 'event') label(ae$treat) <- 'Treatment' denom <- c(enrolled=1000, randomized=400, a=212, b=188) sethreportOption(tx.var='treat', denom=denom) eReport(event ~ treat, data=ae, minincidence=.05)
To show hazard rate estimates from exponential distributions (events per person time of exposure), make up total exposure time for patients on each treatment, in person-years.
exposure <- c(a=10000, b=9000) eReport(event ~ treat, exposure=exposure, expunit='year', data=ae)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.