dataAnalysis/jsScience.R

convertToZ <- function(t, type, df1, df2) {
  z <- numeric(length(t))
  for(i in 1:length(t)) {
    print(i)
    if(type[i] == "F") {
      z[i] <- qnorm(pf(t[i], df1[i], df2[i]))
    } else if(type[i] == "t") {
      z[i] <- qnorm(pt(t[i], df1[i]))
    } else if(type[i] %in% c("Chi2", "Chi")) {
      z[i] <- qnorm(pchisq(t[i], df1[i]))
    } else if(type[i] == "z") {
      z[i] <- t[i]
    } else if(type[i] == "r") {
      z[i] <- atan(t[i]) * sqrt(df1[1] - 3)
    }
    print(z[i])
    a <- 0
  }

  return(z)
}

# ############################################################
# #        RPP Manuscript Figure 1,2,3                       #
# #                                                          #
# # Created by Fred Hasselman on behalf of the RPP intiative #
# ############################################################
#
# # All the custom functions that are required to recreate the Figures are available in a GitHub sourceable file: `C-3PR.R`
# # It is available here: https://github.com/FredHasselman/toolboxR
#
# ############################################################
# # C-3PR: Evaluating Scientific *C*laims: *P*ost *P*ublication *P*eer *R*eview, *R*eplication and *R*eproduction.
# #
# # An R function library created in the contex of analysing and visualising data generated by several Open Science intiatives.
# #
# # C-3PR Created by [Fred Hasselman](https://osf.io/ujgs6/) on behalf of the ManyLabs1, ManyLabs2, RPP and Curate Science projects
# ############################################################
#
#
# # SETUP local R-------------------------------------------------------------------
#
# # Use this code (from the devtools package) to source C-3PR directly from GitHub:
# require(devtools)
# source_url('https://raw.githubusercontent.com/FredHasselman/toolboxR/master/C-3PR.R')
#
# # This will load and (if necessary) install libraries frequently used for data management and plotting
# in.IT(c('ggplot2','RColorBrewer','lattice','gridExtra','plyr','dplyr','httr'))
#
# # Read the data from the OSF storage
# # Note: get.OSFfile() returns a list with the .csv data (df) and information (info) containing the URL download timestamp and original column and rownames (these names will be changed if dfCln=TRUE).
# RPPdata <- get.OSFfile(code='https://osf.io/fgjvw/',dfCln=T)$df
#
# ## If you dowloaded the csv file to your harddrive use this code:
# RPPdata <- read.csv('data/rpp_data.csv', stringsAsFactors = FALSE )
# RPPdata <- df.Clean(RPPdata)
# RPPdata <- RPPdata$df
#
# saveRDS(RPPdata, file = "data/rppclean.rds")
#

# Loading data ------------------------
RPPdata <- readRDS(file = "dataAnalysis/rppclean.rds")

sortPvals <- sort(RPPdata$T.pval.recalc.O)
sum(sortPvals[1:(length(sortPvals)-1)] == sortPvals[2:length(sortPvals)])
which(sortPvals[1:(length(sortPvals)-1)] == sortPvals[2:length(sortPvals)])

# Select the completed replication studies
RPPdata <- dplyr::filter(RPPdata, !is.na(T.pval.USE.O), !is.na(T.pval.USE.R))

# We have 99 studies for which p-values and effect sizes could be calculated
nrow(RPPdata)
# We have 97 studies for which p-values of the original effect were significantly below .05
idOK <- complete.cases(RPPdata$T.r.O,RPPdata$T.r.R)
sum(idOK)

# Starting Analysis -------------------
RPPdata$Test.statistic.O
RPPdata$T.Test.value.O
stats <- RPPdata$T.Test.value.O
statType <- RPPdata$T.Test.Statistic.O
df1 <- RPPdata$T.df1.O
df2 <- RPPdata$T.df2.O
df1[statType == "r"] <- RPPdata$T.N.O[statType == "r"]

originalZ <- qnorm(1 - pmax(RPPdata$T.pval.recalc.O, 10^-15) / 2)
repZ <- qnorm(1 - pmax(RPPdata$T.pval.recalc.R, 10^-15) / 2) * (1 - 2 * (RPPdata$Direction.R == "opposite"))
whichNA <- which(is.na(originalZ))
originalZ <- originalZ[-whichNA]
repZ <- repZ[-whichNA]
# saveRDS(data.frame(orig = originalZ, rep = repZ), file = "dataAnalysis/sciDat.rds")

fit <- censoredJS(originalZ, M = NULL, lower = qnorm(0.025), upper = qnorm(1 - 0.025),
                  iterations = 100)
predict(fit)

par(mfrow = c(1, 1), mar = rep(4, 4))
adjusted <- predict(fit)
plot(originalZ, adjusted)
points(originalZ, repZ, col = "blue")
abline(a = 0, b = 1)
plot(repZ, adjusted)
abline(a = 0, b = 1)

mean(originalZ - repZ, na.rm = TRUE)
mean(adjusted - repZ, na.rm = TRUE)

sqrt(mean((originalZ - repZ)^2, na.rm = TRUE))
sqrt(mean((adjusted - repZ)^2, na.rm = TRUE))

# pdf("figures/scienceStein.pdf",pagecentre=T, width=8,height=3.5 ,paper = "special")
par(mfrow = c(1, 2), mar = rep(4, 4, 2, 2))
plot(density(originalZ), ylim = c(0, 0.6), xlab = "", ylab = "Observed Density", col = "red",
     xlim = c(-3, 9), main = "")
lines(density(repZ[!is.na(repZ)]), col = "black")
lines(density(adjusted[!is.na(adjusted)]), col = "blue")

plot(density(originalZ[!is.na(repZ)] - repZ[!is.na(repZ)]), col = "red", ylab = "Error Density",
     xlab = "", main = "")
lines(density(adjusted[!is.na(repZ)] - repZ[!is.na(repZ)]), col = "blue")
abline(v = 0)
# dev.off()
ammeir2/selectiveTweedy documentation built on May 24, 2019, 3:02 a.m.