############################################################
# RPP Manuscript Figure 1,2,3 #
# #
# Created by Fred Hasselman on behalf of the RPP intiative #
############################################################
# All the custom functions that are required to recreate the Figures are available in a GitHub sourceable file: `C-3PR.R`
# It is available here: https://github.com/FredHasselman/toolboxR
############################################################
# C-3PR: Evaluating Scientific *C*laims: *P*ost *P*ublication *P*eer *R*eview, *R*eplication and *R*eproduction.
#
# An R function library created in the contex of analysing and visualising data generated by several Open Science intiatives.
#
# C-3PR Created by [Fred Hasselman](https://osf.io/ujgs6/) on behalf of the ManyLabs1, ManyLabs2, RPP and Curate Science projects
############################################################
# SETUP local R-------------------------------------------------------------------
# Use this code (from the devtools package) to source C-3PR directly from GitHub:
require(devtools)
source_url('https://raw.githubusercontent.com/FredHasselman/toolboxR/master/C-3PR.R')
# This will load and (if necessary) install libraries frequently used for data management and plotting
in.IT(c('ggplot2','RColorBrewer','lattice','gridExtra','plyr','dplyr','httr'))
# Read the data from the OSF storage
# Note: get.OSFfile() returns a list with the .csv data (df) and information (info) containing the URL download timestamp and original column and rownames (these names will be changed if dfCln=TRUE).
RPPdata <- get.OSFfile(code='https://osf.io/fgjvw/',dfCln=T)$df
## If you dowloaded the csv file to your harddrive use this code:
RPPdata<-read.csv('rpp_data.csv',stringsAsFactors=F )
RPPdata<-df.Clean(RPPdata)
RPPdata<-RPPdata$df
sortPvals <- sort(RPPdata$T.pval.recalc.O)
sum(sortPvals[1:(length(sortPvals)-1)] == sortPvals[2:length(sortPvals)])
which(sortPvals[1:(length(sortPvals)-1)] == sortPvals[2:length(sortPvals)])
# Select the completed replication studies
RPPdata <- dplyr::filter(RPPdata, !is.na(T.pval.USE.O), !is.na(T.pval.USE.R))
# We have 99 studies for which p-values and effect sizes could be calculated
nrow(RPPdata)
# We have 97 studies for which p-values of the original effect were significantly below .05
idOK <- complete.cases(RPPdata$T.r.O,RPPdata$T.r.R)
sum(idOK)
# Get ggplot2 themes predefined in C-3PR
mytheme <- gg.theme("clean")
# Get the Replication observed power
RPPdata$Power.Rn <- as.numeric(RPPdata$Power.R)
########################
# FIGURE 3
# EFFECT SIZE DENSITY PLOTS -------------------------------------------------------------
########################
# Setup some variables
RPPdata$oriSig <- "Not Significant"
# 3 studies claimed an effect at .05 < p < .06
RPPdata$oriSig[RPPdata$T.pval.USE.O<=.06] <- "Significant"
RPPdata$oriSig <- factor(RPPdata$oriSig)
RPPdata$repSig <- "Not Significant"
RPPdata$repSig[RPPdata$T.pval.USE.R<=.05] <- "Significant"
RPPdata$repSig <- factor(RPPdata$repSig)
RPPdata$repSig <- factor(RPPdata$repSig)
# ORIGINAL PLOT ------------------------
# The plotHolder() function from C-3PR creates a blank plot template that will hold the figures
blankPlot <- plotHolder()
# X margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
xDense <- ggplot(RPPdata, aes(x=T.r.O, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
## Uncomment to save subplot
# ggsave("RPP_F3_xDense.png",plot=xDense)
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
## Uncomment to save subplot
# ggsave("RPP_F3_yDense.png",plot=yDense)
# The main scatterplot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
scatterP<-
ggplot(RPPdata,aes(x=T.r.O,y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=Power.Rn,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="Replication Power",range=c(2,9)) +
scale_color_discrete(name="p-value") +
scale_fill_discrete(name="p-value") +
gg.theme("clean") +
theme(legend.position=c(.9,.6), plot.margin = unit(c(-2,-1.5,2,2), "lines"))
## Uncomment to save subplot
# ggsave("RPP_F3_scatter.png",plot=scatterP)
# Yet another way to organise plots: grid.arrange() from the gridExtra package.
grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
# SAVE combined plots as PDF
pdf("RPP_Figure3_original.pdf",pagecentre=T, width=15,height=12 ,paper = "special")
grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
dev.off()
# STATISTIC BASED FIGURE 3 -------------------------------
source("t-correction.R")
source("F-correction.R")
source("chisq-correction.R")
# Performing conditional inference on test statistics
naIndex <- !is.na(RPPdata$T.r.O)
statistics <- RPPdata$T.Test.value.O
statType <- RPPdata$T.Test.Statistic.O
conditionalList <- lapply(1:length(statType), function(x) x)
i <- 1
while(i <= length(conditionalList)) {
if((is.na(RPPdata$T.pval.recalc.O[i]) |
!naIndex[i]) &
i != 80) {
i <- i + 1
next
}
if(i == 80) {
alpha.threshold <- 0.05
} else if(RPPdata$T.pval.recalc.O[i] >= 0.06) {
alpha.threshold <- 1
} else if(RPPdata$T.pval.recalc.O[i] >= 0.05) {
alpha.threshold <- 0.06
} else {
alpha.threshold <- 0.05
}
if(any(statType[i] %in% c("z", "t", "r"))) {
if(statType[i] == "t") {
df <- RPPdata$T.df2.O[i]
t <- statistics[i]
if(df > 90) df <- 100
} else if(statType[i] == "z") {
df <- Inf
t <- statistics[i]
} else if(statType[i] == "r") {
df <- Inf
r <- statistics[i]
t <- 0.5 * log((1 + r) / (1 - r))
t <- t / sqrt(RPPdata$T.N.O[i] - 3)^-1
}
conditionalList[[i]] <- t.selection.correction(t, df, alpha.threshold, 0.05)
} else if(any(statType[i] %in% c("Chi", "Chi2"))) {
df <- RPPdata$T.df1.O[i]
chi <- statistics[i]
conditionalList[[i]] <- chisq.selection.correction(chi, df, alpha.threshold, 0.05)
} else if(statType[i] == "F") {
df1 <- RPPdata$T.df1.O[i]
df2 <- RPPdata$T.df2.O[i]
Fval <- statistics[i]
if(df1 == 1) {
statType[i] <- "t"
statistics[i] <- sqrt(statistics[i])
next
}
conditionalList[[i]] <- Fdist.selection.correction(Fval, df1, df2, alpha.threshold, 0.05)
}
print(i)
i <- i + 1
}
# Converting conditional test statistics to correlations
F2cor <- function(Fval, df1, df2) {
sqrt((Fval * (df1 / df2)) / (((Fval * df1) / df2) + 1)) * sqrt(1 / df1)
}
correctedCor <- rep(NA, length(statistics))
lowerCI <- rep(NA, length(statistics))
upperCI <- rep(NA, length(statistics))
N <- RPPdata$T.N.O
for(i in 1:length(correctedCor)) {
if(!naIndex[i]) next
if(is.na(RPPdata$T.pval.recalc.O[i]) & i != 80) next
conditional <- conditionalList[[i]][[3]]
lower <- conditionalList[[i]][[5]][1]
upper <- conditionalList[[i]][[5]][2]
if(any(statType[i] %in% c("z", "r"))) {
n <- N[i]
z <- conditional * sqrt(n - 3)^-1
correctedCor[i] <- z2cor(z, n)
lowerCI[i] <- z2cor(lower, n)
upperCI[i] <- z2cor(upper, n)
}
if(any(statType[i] %in% c("Chi", "Chi2"))) {
n <- N[i]
correctedCor[i] <- sqrt(conditional / n)
lowerCI[i] <- sqrt(lower / n)
upperCI[i] <- min(sqrt(upper / n), 1)
}
if(statType[[i]] == "t") {
conditional <- conditional^2
df1 <- 1
df2 <- RPPdata$T.df2.O[i]
signLower <- sign(lower)
lower <- lower^2
upper <- upper^2
correctedCor[i] <- F2cor(conditional, df1, df2)
lowerCI[i] <- F2cor(lower, df1, df2) * signLower
upperCI[i] <- F2cor(upper, df1, df2)
}
if(statType[i] == "F") {
df1 <- RPPdata$T.df1.O[i]
df2 <- RPPdata$T.df2.O[i]
correctedCor[i] <- F2cor(conditional, df1, df2)
lowerCI[i] <- F2cor(lower, df1, df2) * signLower
upperCI[i] <- F2cor(upper, df1, df2)
}
}
# non-significant corrected equals original non-significant
originalPvals <- RPPdata$T.pval.recalc.O
originalPvals[is.na(originalPvals)] <- 1
correctedCor[originalPvals >= 0.06] <- RPPdata$T.r.O[originalPvals >= 0.06]
RPPdata$correctedScor <- correctedCor
# Recalculating power
correctedPower <- rep(NA, length(correctedCor))
for(i in 1:length(correctedCor)) {
if(length(conditionalList[[i]]) == 1) next
if(class(conditionalList[[i]]) == "t.correction") {
power <- calculate.power(conditionalList[[i]], newDF = RPPdata$T.N.R[i] - 1)[[2]]
} else if(class(conditionalList[[i]]) == "chisq.correction") {
power <- calculate.power(conditionalList[[i]],
originalN = RPPdata$T.N.O[i],
newN = RPPdata$T.N.R[i])[[2]]
} else if(class(conditionalList[[i]]) == "Fval.correction") {
power <- calculate.power(conditionalList[[i]],
newN = RPPdata$T.N.R[i])[[2]]
}
correctedPower[i] <- power
}
correctedPower[is.na(correctedPower)] <- RPPdata$Power.Rn[is.na(correctedPower)]
RPPdata$correctedPower <- correctedPower
RPPdata$lowerCI <- lowerCI
RPPdata$upperCI <- upperCI
xDense <- ggplot(RPPdata, aes(x=correctedScor, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
# STAT_CORRECTION ORIGINAL
# The main scatterplot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
xDense <- ggplot(RPPdata, aes(x=T.r.O, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
notInCI <- with(RPPdata, T.r.R < lowerCI | T.r.R > upperCI)
RPPdata$notInCI <- notInCI
# CONFIDENCE INTERVAL TABLE ----------------------------
CIdata <- with(RPPdata, data.frame(Local_ID = Local.ID,
Lower_CI = lowerCI,
original_study = T.r.O,
conditional_estimate = correctedScor,
replication_study = T.r.R,
Upper_CI = upperCI,
Stat_Type = T.Test.Statistic.O,
DF1_original = T.df1.O,
DF2_original = T.df2.O,
original_pvalues = T.pval.recalc.O))
write.csv(CIdata, file = "estimates and confidence intervals.csv")
# ORIGINAL FIGURE WITH CONFIDENCE INTERFVALS ----------------------
forPlot <- RPPdata[RPPdata$T.pval.recalc.O <= 0.05, ]
excludeF <- which(forPlot$T.Test.Statistic.O != "F" & forPlot$T.df2.O >1)
forPlot <- forPlot[-excludeF,]
scatterP<-
ggplot(forPlot, aes(x=T.r.O,y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=Power.Rn,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="Replication Power",range=c(2,9)) +
scale_color_discrete(name="p-value") +
scale_fill_discrete(name="p-value") +
gg.theme("clean") +
theme(legend.position=c(.915,.6), plot.margin = unit(c(-2,-1.5,2.5,2), "lines")) +
geom_segment(aes(xend = T.r.O,
y = lowerCI, yend = upperCI, col = repSig), linetype = 1, alpha = 0.8, size = 0.5) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = lowerCI, yend = lowerCI, col = repSig), curvature = 0.5, linetype = 1, size = 1.2) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = upperCI, yend = upperCI, col = repSig), curvature = -0.5, linetype = 1, size = 1.2) +
geom_point(aes(y = correctedScor), col = "black", size = 5, shape = 19) +
geom_point(data = subset(forPlot, notInCI), aes(size = Power.Rn), shape = 21, col = "black", fill = "black", alpha = 0.3)
# Yet another way to organise plots: grid.arrange() from the gridExtra package.
grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
# SAVE combined plots as PDF
pdf("RPP_Figure3_original_5_noF.pdf",pagecentre=T, width=15*1.15,height=12*1.15 ,paper = "special")
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
dev.off()
# ORIGINAL - SIZE BY P-VALUES ------------------------
RPPdata$pvalue_quantile <- order(-RPPdata$T.pval.recalc.O)/length(RPPdata$T.pval.recalc.O)
forPlot <- RPPdata[RPPdata$T.pval.recalc.O <= 0.05, ]
excludeF <- which(forPlot$T.Test.Statistic.O != "F" & forPlot$T.df2.O >1)
forPlot <- forPlot[-excludeF,]
scatterP<-
ggplot(forPlot, aes(x=T.r.O, y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=pvalue_quantile,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="p-value quantile",range=c(2,9)) +
scale_color_discrete(name="Replication p-value") +
scale_fill_discrete(name="Replication p-value") +
gg.theme("clean") +
theme(legend.position=c(.915,.6), plot.margin = unit(c(-2,-1.5,2.5,2), "lines")) +
geom_segment(aes(xend = T.r.O,
y = lowerCI, yend = upperCI, col = repSig), linetype = 1, alpha = 0.8, size = 0.5) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = lowerCI, yend = lowerCI, col = repSig), curvature = 0.5, linetype = 1, size = 1.2) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = upperCI, yend = upperCI, col = repSig), curvature = -0.5, linetype = 1, size = 1.2) +
geom_point(aes(y = correctedScor), col = "black", size = 5, shape = 19) +
geom_point(data = subset(forPlot, notInCI), aes(size = pvalue_quantile), shape = 21, col = "black", fill = "black", alpha = 0.3)
grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
pdf("RPP_Figure3_pvaluesize_noF.pdf",pagecentre=T, width=15*1.15,height=12*1.15 ,paper = "special")
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
dev.off()
# STAT_CORRECTION Corrected
# The main scatterplot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
xDense <- ggplot(RPPdata, aes(x=correctedScor, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
notInCI <- with(RPPdata, T.r.R < lowerCI | T.r.R > upperCI)
# FIGURE 3 WITH CONDITIONAL ESTIMATES --------------------------
forPlot <- RPPdata[RPPdata$T.pval.recalc.O <= 0.05, ]
excludeF <- which(forPlot$T.Test.Statistic.O != "F" & forPlot$T.df2.O >1)
forPlot <- forPlot[-excludeF,]
scatterP<-
ggplot(RPPdata,aes(x=correctedScor, y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=correctedPower,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="Replication Power",range=c(0,10)) +
scale_color_discrete(name="p-value") +
scale_fill_discrete(name="p-value") +
gg.theme("clean") +
theme(legend.position=c(.915,.6), plot.margin = unit(c(-2,-1.5,2.5,2), "lines")) +
geom_segment(aes(xend = correctedScor,
y = lowerCI, yend = upperCI, col = repSig), linetype = 1, alpha = 0.8, size = 0.5) +
geom_curve(aes(x = correctedScor - 0.0075, xend = correctedScor + 0.0075,
y = lowerCI, yend = lowerCI, col = repSig), curvature = 0.5, linetype = 1, size = 1.2) +
geom_curve(aes(x = correctedScor - 0.0075, xend = correctedScor + 0.0075,
y = upperCI, yend = upperCI, col = repSig), curvature = -0.5, linetype = 1, size = 1.2) +
geom_point(data = subset(RPPdata, notInCI), aes(size = correctedPower), shape = 21, col = "black", fill = "black", alpha = 0.3)
# Yet another way to organise plots: grid.arrange() from the gridExtra package.
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
# SAVE combined plots as PDF
pdf("RPP_Figure3_stat_5_noF.pdf",pagecentre=T, width=15*1.15,height=12*1.15 ,paper = "special")
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
dev.off()
ggplot(RPPdata) +
geom_density(aes(x = correctedPower, fill = repSig), alpha = 0.5)
ggplot(RPPdata) +
geom_density(aes(x = Power.Rn, fill = repSig), alpha = 0.5)
plot(x = correctedPower, y = RPPdata$repSig == "Significant")
lines(lowess(x = correctedPower, y = RPPdata$repSig == "Significant"),type = "l")
abline(a = 0, b = 1)
# SMOOOOOTHING --------------------------------------------
forPlot <- RPPdata
forPlot <- forPlot[forPlot$T.pval.recalc.O <= 0.05, ]
forPlot <- forPlot[-excludeF, ]
ggplot(forPlot) +
geom_smooth(aes(x = T.r.O, y = T.r.R), col = "blue") +
geom_point(aes(x = T.r.O, y = T.r.R), col = "blue") +
geom_smooth(aes(x = correctedScor, y = T.r.R), col = "red") +
geom_point(aes(x = correctedScor, y = T.r.R), col = "red") +
geom_abline(intercept = 0, slope = 1) +
theme_bw() +
xlab("Original Effect Size Estimate") +
ylab("Replicated Effect Size Estimate") +
geom_segment(aes(y = T.r.R, yend = T.r.R, x = T.r.O, xend = correctedScor),
col = "black", linetype = 1, alpha = 0.5, arrow = arrow(length = unit(0.3, "cm")))
# Figure Plot by Mean Estimated Signal Size --------------------
ncp <- unlist(sapply(conditionalList, function(x) if(length(x) > 0) x[2]))
ncp[statType == "t"] <- ncp[statType == "t"]^2
conditional.ncp <- unlist(sapply(conditionalList, function(x) if(length(x) > 0) x[4]))^2
ncpCI <- matrix(nrow = length(ncp), ncol = 2)
for(i in 1:length(ncp)) {
if(length(conditionalList[[i]]) > 1)
ncpCI[i, ] <- conditionalList[[i]][[6]]
}
CIsignSwitch <- which(sign(ncpCI[, 1]) != sign(ncpCI[, 2]))
ncpCI <- pmax(ncpCI, min(ncpCI[ncpCI > 0], na.rm = TRUE) / 10)
ncpCI <- ncpCI ^ 2
# Computing replicated NCP
R.statType <- RPPdata$T.Test.Statistic.R
R.ncp <- ncp
for(i in 1:length(ncp)) {
if(R.statType[i] == "t") {
df <- RPPdata$T.df2.R[i]
if(df > 90) df <- 90
R.ncp[i] <- t.to.ncp(RPPdata$T.Test.value.R[i], df)^2
} else if(R.statType[i] == "F") {
R.ncp[i] <- with(RPPdata, Fval.to.ncp(T.Test.value.R[i], T.df1.R[i], T.df2.R[i]))
} else {
R.ncp[i] <- with(RPPdata, chisq.to.ncp(T.Test.value.R[i], T.df1.R[i]))
}
}
# Transforming to log scale
ncp <- log(ncp)
R.ncp <- pmax(R.ncp, min(R.ncp[R.ncp != 0] / 10, na.rm = TRUE))
R.ncp <- log(R.ncp)
conditional.ncp <- log(conditional.ncp)
ncpCI <- log(ncpCI)
# Fixing replicated signs
signSwitch <- with(RPPdata, which(sign(T.r.O) != sign(T.r.R)))
R.ncp[signSwitch] <- -R.ncp[signSwitch]
forNCPplot <- data.frame(ncp = ncp, conditinoalNCP = conditional.ncp,
lowerCI = ncpCI[, 1], upperCI = ncpCI[, 2],
repNCP = R.ncp,
condPower = RPPdata$correctedPower,
repSig = RPPdata$repSig)
ggplot(forNCPplot) +
geom_point(aes(x = conditional.ncp, y = R.ncp, fill = factor(repSig), size = correctedPower), color="Grey30",shape=21,alpha=.8) +
theme_bw() +
geom_abline(intercept = 0, slope = 1)
# Same calculations but with 0.01 threshold ---------------------------
source("t-correction.R")
source("F-correction.R")
source("chisq-correction.R")
# Performing conditional inference on test statistics
naIndex <- !is.na(RPPdata$T.r.O)
statistics <- RPPdata$T.Test.value.O
statType <- RPPdata$T.Test.Statistic.O
conditionalList <- lapply(1:length(statType), function(x) x)
i <- 1
while(i <= length(conditionalList)) {
if((is.na(RPPdata$T.pval.recalc.O[i]) |
!naIndex[i]) &
i != 80) {
i <- i + 1
next
}
if(i == 80) {
alpha.threshold <- 0.05
} else if(RPPdata$T.pval.recalc.O[i] >= 0.06) {
alpha.threshold <- 1
} else if(RPPdata$T.pval.recalc.O[i] >= 0.05) {
alpha.threshold <- 0.06
} else if(RPPdata$T.pval.recalc.O[i] >= 0.005) {
alpha.threshold <- 0.05
} else {
alpha.threshold <- 0.005
}
if(any(statType[i] %in% c("z", "t", "r"))) {
if(statType[i] == "t") {
df <- RPPdata$T.df2.O[i]
t <- statistics[i]
if(df > 90) df <- 100
} else if(statType[i] == "z") {
df <- Inf
t <- statistics[i]
} else if(statType[i] == "r") {
df <- Inf
r <- statistics[i]
t <- 0.5 * log((1 + r) / (1 - r))
t <- t / sqrt(RPPdata$T.N.O[i] - 3)^-1
}
conditionalList[[i]] <- t.selection.correction(t, df, alpha.threshold, 0.05)
} else if(any(statType[i] %in% c("Chi", "Chi2"))) {
df <- RPPdata$T.df1.O[i]
chi <- statistics[i]
conditionalList[[i]] <- chisq.selection.correction(chi, df, alpha.threshold, 0.05)
} else if(statType[i] == "F") {
df1 <- RPPdata$T.df1.O[i]
df2 <- RPPdata$T.df2.O[i]
Fval <- statistics[i]
if(df1 == 1) {
statType[i] <- "t"
statistics[i] <- sqrt(statistics[i])
next
}
conditionalList[[i]] <- Fdist.selection.correction(Fval, df1, df2, alpha.threshold, 0.05)
}
print(i)
i <- i + 1
}
# Converting conditional test statistics to correlations
F2cor <- function(Fval, df1, df2) {
sqrt((Fval * (df1 / df2)) / (((Fval * df1) / df2) + 1)) * sqrt(1 / df1)
}
correctedCor <- rep(NA, length(statistics))
lowerCI <- rep(NA, length(statistics))
upperCI <- rep(NA, length(statistics))
N <- RPPdata$T.N.O
for(i in 1:length(correctedCor)) {
if(!naIndex[i]) next
if(is.na(RPPdata$T.pval.recalc.O[i]) & i != 80) next
conditional <- conditionalList[[i]][[3]]
lower <- conditionalList[[i]][[5]][1]
upper <- conditionalList[[i]][[5]][2]
if(any(statType[i] %in% c("z", "r"))) {
n <- N[i]
z <- conditional * sqrt(n - 3)^-1
correctedCor[i] <- z2cor(z, n)
lowerCI[i] <- z2cor(lower, n)
upperCI[i] <- z2cor(upper, n)
}
if(any(statType[i] %in% c("Chi", "Chi2"))) {
n <- N[i]
correctedCor[i] <- sqrt(conditional / n)
lowerCI[i] <- sqrt(lower / n)
upperCI[i] <- min(sqrt(upper / n), 1)
}
if(statType[[i]] == "t") {
conditional <- conditional^2
df1 <- 1
df2 <- RPPdata$T.df2.O[i]
signLower <- sign(lower)
lower <- lower^2
upper <- upper^2
correctedCor[i] <- F2cor(conditional, df1, df2)
lowerCI[i] <- F2cor(lower, df1, df2) * signLower
upperCI[i] <- F2cor(upper, df1, df2)
}
if(statType[i] == "F") {
df1 <- RPPdata$T.df1.O[i]
df2 <- RPPdata$T.df2.O[i]
correctedCor[i] <- F2cor(conditional, df1, df2)
lowerCI[i] <- F2cor(lower, df1, df2) * signLower
upperCI[i] <- F2cor(upper, df1, df2)
}
}
# non-significant corrected equals original non-significant
originalPvals <- RPPdata$T.pval.recalc.O
originalPvals[is.na(originalPvals)] <- 1
correctedCor[originalPvals >= 0.06] <- RPPdata$T.r.O[originalPvals >= 0.06]
RPPdata$correctedScor <- correctedCor
# Recalculating power
correctedPower <- rep(NA, length(correctedCor))
for(i in 1:length(correctedCor)) {
if(length(conditionalList[[i]]) == 1) next
if(class(conditionalList[[i]]) == "t.correction") {
power <- calculate.power(conditionalList[[i]], newDF = RPPdata$T.N.R[i] - 1)[[2]]
} else if(class(conditionalList[[i]]) == "chisq.correction") {
power <- calculate.power(conditionalList[[i]],
originalN = RPPdata$T.N.O[i],
newN = RPPdata$T.N.R[i])[[2]]
} else if(class(conditionalList[[i]]) == "Fval.correction") {
power <- calculate.power(conditionalList[[i]],
newN = RPPdata$T.N.R[i])[[2]]
}
correctedPower[i] <- power
}
correctedPower[is.na(correctedPower)] <- RPPdata$Power.Rn[is.na(correctedPower)]
RPPdata$correctedPower <- correctedPower
RPPdata$lowerCI <- lowerCI
RPPdata$upperCI <- upperCI
xDense <- ggplot(RPPdata, aes(x=correctedScor, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
# STAT_CORRECTION ORIGINAL
notInCI <- with(RPPdata, T.r.R < lowerCI | T.r.R > upperCI)
RPPdata$notInCI <- notInCI
RPPdata$onePercentSig <- RPPdata$T.pval.recalc.O <= 0.005
forPlot <- RPPdata[RPPdata$onePercentSig, ]
# The main scatterplot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
xDense <- ggplot(forPlot, aes(x=T.r.O, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(forPlot, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
excludeF <- which(forPlot$T.Test.Statistic.O != "F" & forPlot$T.df2.O >1)
forPlot <- forPlot[-excludeF,]
scatterP<-
ggplot(forPlot, aes(x=T.r.O,y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=Power.Rn,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="Replication Power",range=c(2,9)) +
scale_color_discrete(name="p-value") +
scale_fill_discrete(name="p-value") +
gg.theme("clean") +
theme(legend.position=c(.915,.6), plot.margin = unit(c(-2,-1.5,2.5,2), "lines")) +
geom_segment(aes(xend = T.r.O,
y = lowerCI, yend = upperCI, col = repSig), linetype = 1, alpha = 0.8, size = 0.5) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = lowerCI, yend = lowerCI, col = repSig), curvature = 0.5, linetype = 1, size = 1.2) +
geom_curve(aes(x = T.r.O - 0.0075, xend = T.r.O + 0.0075,
y = upperCI, yend = upperCI, col = repSig), curvature = -0.5, linetype = 1, size = 1.2) +
geom_point(aes(y = correctedScor), col = "black", size = 5, shape = 19) +
geom_point(data = subset(forPlot, notInCI), aes(size = Power.Rn), shape = 21, col = "black", fill = "black", alpha = 0.3)
# Yet another way to organise plots: grid.arrange() from the gridExtra package.
grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
# SAVE combined plots as PDF
pdf("RPP_Figure3_original_verySig_005_noF.pdf",pagecentre=T, width=15*1.15,height=12*1.15 ,paper = "special")
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
dev.off()
# STAT_CORRECTION Corrected
# The main scatterplot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
xDense <- ggplot(RPPdata, aes(x=correctedScor, fill=oriSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(0,1) +
gg.theme("noax") +
theme(legend.position = "none",plot.margin = unit(c(0,0,0,4), "lines"))
# Y margin density plot (note: gg.theme() from C-3PR can be used directly in a ggplot2() call)
yDense <- ggplot(RPPdata, aes(x=T.r.R, fill=repSig)) +
geom_density(aes(y= ..count..),trim=F,alpha=.5) +
xlab("") + ylab("") + xlim(-.5,1) +
coord_flip() +
gg.theme("noax") +
theme(legend.position = "none", plot.margin = unit(c(0,0,3,0), "lines"))
scatterP<-
ggplot(RPPdata,aes(x=correctedScor, y=T.r.R)) +
geom_hline(aes(yintercept=0),linetype=2) +
geom_abline(intercept=0,slope=1,color="Grey60")+
geom_point(aes(size=correctedPower,fill=repSig),color="Grey30",shape=21,alpha=.8) +
geom_rug(aes(color=oriSig),size=1,sides="b",alpha=.6) +
geom_rug(aes(color=repSig),size=1,sides="l",alpha=.6) +
scale_x_continuous(name="Original Effect Size",limits=c(0,1),breaks=c(0,.25,.5,.75,1)) +
scale_y_continuous(name="Replication Effect Size",limits=c(-.5,1),breaks=c(-.5,-.25,0,.25,.5,.75,1)) +
ggtitle("") + xlab("") + ylab("") +
scale_size_continuous(name="Replication Power",range=c(0,10)) +
scale_color_discrete(name="p-value") +
scale_fill_discrete(name="p-value") +
gg.theme("clean") +
theme(legend.position=c(.915,.6), plot.margin = unit(c(-2,-1.5,2.5,2), "lines")) +
geom_segment(aes(xend = correctedScor,
y = lowerCI, yend = upperCI, col = repSig), linetype = 1, alpha = 1, size = 1.1) +
geom_curve(aes(x = correctedScor - 0.0075, xend = correctedScor + 0.0075,
y = lowerCI, yend = lowerCI, col = repSig), curvature = 0.5, linetype = 1) +
geom_curve(aes(x = correctedScor - 0.0075, xend = correctedScor + 0.0075,
y = upperCI, yend = upperCI, col = repSig), curvature = - 0.5, linetype = 1) +
geom_point(data = subset(RPPdata, notInCI), aes(size = correctedPower), shape = 21, col = "black", fill = "black", alpha = 0.3)
# Yet another way to organise plots: grid.arrange() from the gridExtra package.
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1.4), heights=c(1.4, 4))
# SAVE combined plots as PDF
pdf("RPP_Figure3_stat_verySig_1.pdf",pagecentre=T, width=15*1.15,height=12*1.15 ,paper = "special")
gridExtra::grid.arrange(xDense, blankPlot, scatterP, yDense, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.