suppressPackageStartupMessages({ library(png) library(grid) })
im = readPNG("images/workshopFace.png") grid.raster(im)
im = readPNG("images/NAStask.png") grid.raster(im)
1) providing code and data and environment to independent parties to diminish risk of analyses that are not reproducible
2) fortifying criteria of statistical soundness of analyses (study interpretations) to control risk of non-replicability of studies
3) doing 1) and 2) in ways that are cost-effective
[R]eproducibility is a property of a study, and replicability is a property of a result that can be proved only by inspecting other results of similar experiments. Therefore, the reproducibility of a result from a single study can be assured, and improving the statistical analysis can enhance its replicability.
My reading:
Extensibility and transportability should not be divorced from reproducibility
Scalability cannot be divorced from reproducbility
Computable documents (rmarkdown, jupyter, ...) are essential elements of cost-effective reproducible research
1) Analysis = software + data + environment + invocations
2) Reproducible analysis = analysis that can be carried out by independent parties
3) Extensible analysis = analysis supporting independent variations
4) Study = Design + implementation + analysis
5) Replicable study = A study that, when executed by independent parties, produces statistically compatible interpretations
1) What is an environment for an analysis?
2) What are independent parties? (See Kahneman's concept of "replication etiquette")
3) What are independent variations on an analysis and why are they important?
4) When are two interpretations of related or identical studies statistically compatible? Reformulate in terms of the standards of replicability for a given field. Example: GWAS catalog concept of replicated finding (see, e.g., P. Kraft et al., Stat. Sci. 2009).
im = readPNG("images/KraftGWASreplication.png") grid.raster(im,height=1.2)
Try to use the same phenotype
Issues:
im = readPNG("images/StregaTitle.png") grid.raster(im)
im = readPNG("images/strega12.png") grid.raster(im)
im = readPNG("images/strega13_16.png") grid.raster(im)
im = readPNG("images/gwascat.png") grid.raster(im)
im = readPNG("images/exacNEJMtitle.png") grid.raster(im)
im = readPNG("images/exacNYTpic.png") grid.raster(im)
im = readPNG("images/exacNYTtext.png") grid.raster(im)
im = readPNG("images/exacUseRecs.png") grid.raster(im)
im = readPNG("images/exacHistosPCA.png") grid.raster(im)
im = readPNG("images/boosStefRP.png") grid.raster(im,width=.95)
im = readPNG("images/johnson1.png") grid.raster(im,width=.99)
im = readPNG("images/johnson2.png") grid.raster(im,width=.99)
im = readPNG("images/ochsCover.png") grid.raster(im,width=.99)
(before)
im = readPNG("images/dressmanAttack56.png") grid.raster(im,width=.99)
im = readPNG("images/dressmanRetraction.png") grid.raster(im,width=.99)
im = readPNG("images/careyStoddenCaseStuds.png") grid.raster(im,width=.99)
Answer: A chapter supported by a Bioconductor package
im = readPNG("images/buildHeader.png") grid.raster(im,width=.99)
im = readPNG("images/dressCheckCI.png") grid.raster(im,width=.99)
psurv = function (x, digits = max(options()$digits - 4, 3), ...) { # # obtain p-value for log-rank test # saveopt <- options(digits = digits) on.exit(options(saveopt)) if (!inherits(x, "survdiff")) stop("Object is not the result of survdiff") if (!is.null(cl <- x$call)) { } omit <- x$na.action if (length(omit)) { } if (length(x$n) == 1) { z <- sign(x$exp - x$obs) * sqrt(x$chisq) temp <- c(x$obs, x$exp, z, signif(1 - pchisq(x$chisq, 1), digits)) names(temp) <- c("Observed", "Expected", "Z", "p") print(temp) } else { if (is.matrix(x$obs)) { otmp <- apply(x$obs, 1, sum) etmp <- apply(x$exp, 1, sum) } else { otmp <- x$obs etmp <- x$exp } df <- (sum(1 * (etmp > 0))) - 1 temp <- cbind(x$n, otmp, etmp, ((otmp - etmp)^2)/etmp, ((otmp - etmp)^2)/diag(x$var)) dimnames(temp) <- list(names(x$n), c("N", "Observed", "Expected", "(O-E)^2/E", "(O-E)^2/V")) uu <- 1 - pchisq(x$chisq, df) } uu } extendDressCheck = function() { suppressPackageStartupMessages({ library(survival) library(shiny) library(dressCheck) library(chron) }) data(E2F3sig.probes) data(Src.probes) tokeep = unique(c(E2F3sig.probes, Src.probes)) if (!exists("corrp116")) data(corrp116) tokeep = intersect(tokeep, rownames(exprs(corrp116))) library(hgu133plus2.db) suppressMessages({ s133 = select(hgu133plus2.db, keys=tokeep, keytype="PROBEID", columns=c("SYMBOL")) }) pid = s133[,1] names(pid) = s133[,2] drop = which(is.na(names(pid))) pid = pid[-drop] pid = pid[order(names(pid))] npid = names(pid) fixup = function(vec) { rgt = c(vec, vec[length(vec)]) lef = c("<", paste(vec, "-", sep="")) lef = sub(paste(vec[length(vec)],"-",sep=""), ">", lef) paste(lef,rgt, sep="") } dateStrat = function(chr, nstrat=3) { ps = (1:nstrat)/nstrat cuts = c(ps[-length(ps)]) qs = quantile(chr, cuts) intervals = fixup(as.character(qs)) tags = rep(" ", length(chr)) for (i in 1:length(qs)) tags[ which(chr < qs[i] & tags == " ") ] = intervals[i] tags[ which(chr >= qs[i]) ] = intervals[length(qs)+1] ordered(tags, levels=intervals) } ui = fluidPage( sidebarPanel( helpText("Interactive analysis of Dressman et al. JCO 2007 data as discussed in Carey and Stodden 2010"), helpText(" "), helpText("Select a gene for plotting expression against sample date"), selectizeInput("insym", "input", choices=npid, selected="RPS11"), helpText("Pick a number of date strata for logrank/K-M plots (for platinum non-responders)"), radioButtons("numstrat", "# date strata", choices=2:5, selected=2) ), mainPanel( tabsetPanel( tabPanel("Expr Confounding", plotOutput("confplot")), tabPanel("Surv Confounding", plotOutput("survplot")) ) ) ) server = function(input, output, session) { output$chk = renderText({input$insym}) output$confplot = renderPlot( { an = as.numeric probe = pid[input$insym] plot(an(exprs(corrp116[probe,]))~chron(corrp116$rundate), main="(a)", xlab="array run date", ylab=paste("RMA+SFR expression of", input$insym)) } ) output$survplot = renderPlot( { num = as.numeric(input$numstrat) CC = cut(chron(corrp116$rundate), num) #input$numstrat) dstra = dateStrat(chron(corrp116$rundate), num) with(pData(corrp116), d0 <<- survdiff(Surv(Survival, dead)~dstra, subset=CR==0)) with(pData(corrp116), plot(survfit(Surv(Survival, dead)~dstra, subset=CR==0),col=1:num, lwd=3, xlab="Months", ylab="survival (%)", main="(b)")) text(37,.03, paste("logrank p=", round(psurv(d0),3))) legend(70, .8, lty=1, lwd=3, col=1:num, legend=levels(dstra)) } ) } shinyApp(ui, server) } extendDressCheck()
Statistical analysis of the RAVE study (NEJM 2013) includes a link to figures and code
visit and clone github.com/vjcitn/barca with an eye towards contributing unit tests
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.