suppressPackageStartupMessages({
library(png)
library(grid)
})

Resource

im = readPNG("images/workshopFace.png")
grid.raster(im)

Task

im = readPNG("images/NAStask.png")
grid.raster(im)

The three themes underlying reproducibility research

1) providing code and data and environment to independent parties to diminish risk of analyses that are not reproducible

2) fortifying criteria of statistical soundness of analyses (study interpretations) to control risk of non-replicability of studies

3) doing 1) and 2) in ways that are cost-effective

Y. Benjamini, NAS workshop p. 47

[R]eproducibility is a property of a study, and replicability is a property of a result that can be proved only by inspecting other results of similar experiments. Therefore, the reproducibility of a result from a single study can be assured, and improving the statistical analysis can enhance its replicability.

My reading:

Road map of talk

A personal view

Basic definitions

1) Analysis = software + data + environment + invocations

2) Reproducible analysis = analysis that can be carried out by independent parties

3) Extensible analysis = analysis supporting independent variations

4) Study = Design + implementation + analysis

5) Replicable study = A study that, when executed by independent parties, produces statistically compatible interpretations

Open questions

1) What is an environment for an analysis?

2) What are independent parties? (See Kahneman's concept of "replication etiquette")

3) What are independent variations on an analysis and why are they important?

4) When are two interpretations of related or identical studies statistically compatible? Reformulate in terms of the standards of replicability for a given field. Example: GWAS catalog concept of replicated finding (see, e.g., P. Kraft et al., Stat. Sci. 2009).

GWAS: p-values are not created equal

im = readPNG("images/KraftGWASreplication.png")
grid.raster(im,height=1.2)

GWAS replication recommendations in Kraft et al. 2009

STREGA: genetic association reporting guidelines

im = readPNG("images/StregaTitle.png")
grid.raster(im)

Guideline 12

im = readPNG("images/strega12.png")
grid.raster(im)

Guidelines 13-16

im = readPNG("images/strega13_16.png")
grid.raster(im)

Upshots

GWAS catalog eligibility

im = readPNG("images/gwascat.png")
grid.raster(im)

Genetic medical counseling errors

im = readPNG("images/exacNEJMtitle.png")
grid.raster(im)

Headline

im = readPNG("images/exacNYTpic.png")
grid.raster(im)

Consequences of error

im = readPNG("images/exacNYTtext.png")
grid.raster(im)

Moving forward

im = readPNG("images/exacUseRecs.png")
grid.raster(im)

A view of Exome Aggregation Consortium

im = readPNG("images/exacHistosPCA.png")
grid.raster(im)

Upshots

Some material from the NAS workshop: Benjamini

Some material from the NAS workshop: Boos

Boos: Replication probabilities are low for conventional thresholds

im = readPNG("images/boosStefRP.png")
grid.raster(im,width=.95)

Some material from the NAS workshop: Valen Johnson (PNAS paper)

im = readPNG("images/johnson1.png")
grid.raster(im,width=.99)

Costs of more stringent thresholds: $N_{strong} > 1.7 \times N_{conventional}$

im = readPNG("images/johnson2.png")
grid.raster(im,width=.99)

Upshots

Transition: Some case studies in reproducibility

dressCheck: a package that backs up a book chapter

im = readPNG("images/ochsCover.png")
grid.raster(im,width=.99)

Discuss: Rebutting a rebuttal

(before)

im = readPNG("images/dressmanAttack56.png")
grid.raster(im,width=.99)

(after)

im = readPNG("images/dressmanRetraction.png")
grid.raster(im,width=.99)

Starting out

im = readPNG("images/careyStoddenCaseStuds.png")
grid.raster(im,width=.99)

Basic questions

Answer: A chapter supported by a Bioconductor package

Build reports for ExperimentData packages

im = readPNG("images/buildHeader.png")
grid.raster(im,width=.99)

checking dressCheck

im = readPNG("images/dressCheckCI.png")
grid.raster(im,width=.99)

How to proceed

Beyond selected static graphics

psurv = function (x, digits = max(options()$digits - 4, 3), ...) 
{
#
# obtain p-value for log-rank test
#
    saveopt <- options(digits = digits)
    on.exit(options(saveopt))
    if (!inherits(x, "survdiff")) 
        stop("Object is not the result of survdiff")
    if (!is.null(cl <- x$call)) {
    }
    omit <- x$na.action
    if (length(omit)) {
    }
    if (length(x$n) == 1) {
        z <- sign(x$exp - x$obs) * sqrt(x$chisq)
        temp <- c(x$obs, x$exp, z, signif(1 - pchisq(x$chisq, 
            1), digits))
        names(temp) <- c("Observed", "Expected", "Z", "p")
        print(temp)
    }
    else {
        if (is.matrix(x$obs)) {
            otmp <- apply(x$obs, 1, sum)
            etmp <- apply(x$exp, 1, sum)
        }
        else {
            otmp <- x$obs
            etmp <- x$exp
        }
        df <- (sum(1 * (etmp > 0))) - 1
        temp <- cbind(x$n, otmp, etmp, ((otmp - etmp)^2)/etmp, 
            ((otmp - etmp)^2)/diag(x$var))
        dimnames(temp) <- list(names(x$n), c("N", "Observed", 
            "Expected", "(O-E)^2/E", "(O-E)^2/V"))
        uu <- 1 - pchisq(x$chisq, df)
    }
    uu
}

extendDressCheck = function() {
suppressPackageStartupMessages({
  library(survival)
  library(shiny)
  library(dressCheck)
  library(chron)
})
  data(E2F3sig.probes)
  data(Src.probes)
  tokeep = unique(c(E2F3sig.probes, Src.probes))
  if (!exists("corrp116")) data(corrp116)
  tokeep = intersect(tokeep, rownames(exprs(corrp116)))
  library(hgu133plus2.db)
suppressMessages({
  s133 = select(hgu133plus2.db, 
     keys=tokeep, keytype="PROBEID", columns=c("SYMBOL")) 
})
  pid = s133[,1]
  names(pid) = s133[,2]
  drop = which(is.na(names(pid)))
  pid = pid[-drop]
  pid = pid[order(names(pid))]
  npid = names(pid)

fixup = function(vec) {
  rgt = c(vec, vec[length(vec)])
  lef = c("<", paste(vec, "-", sep=""))
  lef = sub(paste(vec[length(vec)],"-",sep=""), ">", lef)
  paste(lef,rgt, sep="")
}

dateStrat = function(chr, nstrat=3) {
 ps = (1:nstrat)/nstrat
 cuts = c(ps[-length(ps)])
 qs = quantile(chr, cuts)
 intervals = fixup(as.character(qs))
 tags = rep(" ", length(chr))
 for (i in 1:length(qs)) 
  tags[ which(chr < qs[i] & tags == " ") ] = intervals[i]
 tags[ which(chr >= qs[i]) ] = intervals[length(qs)+1]
 ordered(tags, levels=intervals)
}

  ui = fluidPage(
        sidebarPanel(
         helpText("Interactive analysis of Dressman et al. JCO 2007 data as discussed in Carey and Stodden 2010"),
         helpText(" "),
         helpText("Select a gene for plotting expression against sample date"),
         selectizeInput("insym", "input", choices=npid, selected="RPS11"),
         helpText("Pick a number of date strata for logrank/K-M plots (for platinum non-responders)"),
         radioButtons("numstrat", "# date strata", choices=2:5, selected=2)
        ),
        mainPanel(
         tabsetPanel(
          tabPanel("Expr Confounding", plotOutput("confplot")),
          tabPanel("Surv Confounding", plotOutput("survplot"))
         )
        )
       )
  server = function(input, output, session) {
    output$chk = renderText({input$insym})
    output$confplot = renderPlot( {
               an = as.numeric
               probe = pid[input$insym]
               plot(an(exprs(corrp116[probe,]))~chron(corrp116$rundate), main="(a)",
        xlab="array run date", ylab=paste("RMA+SFR expression of", input$insym))
               } )
    output$survplot = renderPlot( {
        num = as.numeric(input$numstrat)
        CC = cut(chron(corrp116$rundate), num) #input$numstrat)
        dstra = dateStrat(chron(corrp116$rundate), num)
        with(pData(corrp116), d0 <<- survdiff(Surv(Survival, dead)~dstra,
        subset=CR==0))

        with(pData(corrp116), plot(survfit(Surv(Survival, dead)~dstra,
        subset=CR==0),col=1:num, lwd=3,
        xlab="Months", ylab="survival (%)", main="(b)"))
        text(37,.03, paste("logrank p=", round(psurv(d0),3)))
        legend(70, .8, lty=1, lwd=3, col=1:num, legend=levels(dstra))
        } )
  }
  shinyApp(ui, server)
}
extendDressCheck()

Upshots

Bridging from publication to data

Statistical analysis of the RAVE study (NEJM 2013) includes a link to figures and code

Using github and travis-ci

visit and clone github.com/vjcitn/barca with an eye towards contributing unit tests

Conclusions



vjcitn/Repro2017 documentation built on May 3, 2019, 6:13 p.m.