R/pcurve_corr.R

Defines functions pcurve_cor

#R Code running behind p-curve.com/App3
#Written by Uri Simonsohn (uws@wharton.upenn.edu)
#This version: 2015 03 02
#
#   <-- PLEASE CONTACT ME DIRECTLY IF YOU SEE ANY ERRORS OR HAVE SUGGESTIONS  -->

# Note: 
#The code below has some redundancies because it merges  code written for two different papers
#Some of the clunkiness of the code (e.g., how results are reported) arises because the code
#was written to run on a server and save results that are then presented on a website.
#It was adapted to be run on a personal computer introducing the least # of changes possible to the code.
#

############################################################################################################

#                R E A D     M E               #
#  (---------(0)------------(0)---------------)

#ENTER/LOAD YOUR TEST RESULTS HERE: (USE THIS SYNTAX)
# tests = c("t(88)=2.1",
#           "r(147)=.246",
#           "F(1,100)=9.1",
#           "f(2,210)=4.45",
#           "Z=3.45",
#           "chi2(1)=9.1",
#           "r(77)=.47",
#           "chi2(2)=8.74")

#tests = paste("r(",dat$ni,")=",dat$ri, sep = "")
#RUN ALL CODE UP TO #14 TO GET MAIN RESULTS, P-CURVE CHART, AND POWER CHART
#THEN RUN CODE #15 TO GET THE CUMULATIVE P-CURVE (DROPPING STUDIES)




pcurve_cor <- function(ni, 
                       ri, 
                       showText = TRUE, 
                       showPlot = TRUE) {
  

# tests = paste("r(",dat$ni,")=",dat$ri, sep = "")

tests = paste("r(",ni,")=",ri, sep = "")
  
############################################################################################################

#Load libraries necessary to run programs
library(stringr)  #Library to process string variables (text of the entered tests)
library(poibin)   #This library has the Poisson binomial, the distribution of the sum of binomial with different 
#underlying probabilities used to compute the binomial test given that each test has a (slightly) 
#different probability of p<.025 depending on its own non-central parameter
#See Hong (2013) - "On computing the distribution function for the Poisson binomial distribution" Computational 
#Statistics and Data Analysis, V59, p.41-51 - http://dx.doi.org/10.1016/j.csda.2012.10.006 



#1. create empty vectors for
#1.1 pp-values
t.ppr=f.ppr=c.ppr=z.ppr=c();      	#right skew
t.ppl=f.ppl=c.ppl=z.ppl=c();       	#left
t.pp33=f.pp33=c.pp33=z.pp33=c();   	#33%

#1.2 proportions expected to be low (p<.025) for each  test
t.plow=f.plow=c.plow=z.plow=c()

#1.3 proportions for all tests are 0 so that if a type of test is missing we know it is not there when aggregating
t.prop1=t.prop2=t.prop3=t.prop4=t.prop5=0;
f.prop1=f.prop2=f.prop3=f.prop4=f.prop5=0;
z.prop1=z.prop2=z.prop3=z.prop4=z.prop5=0;
c.prop1=c.prop2=c.prop3=c.prop4=c.prop5=0;


#1.4 Function 1 - functions that find noncentrality parameter for t,f,chi distributions that gives 33% power for those d.f.

#t-test
ncp33t =function(df) 
{      
  xc=qt(p=1-.05/2, df=df)
  #Find noncentrality parameter (ncp) that leads 33% power to obtain xc
  f = function(delta, pr, x, df) pt(x, df = df, ncp = delta) - 2/3
  out = uniroot(f, c(0, 37.62), x = xc, df = df)  
  return(out$root) 
}

#F-test
ncp33f =function(df1,df2) 
{      
  xc=qf(p=1-.05,df1=df1,df2=df2)
  f = function(delta, pr, x, df1,df2) pf(x, df1 = df1, df2=df2, ncp = delta) - 2/3
  out = uniroot(f, c(0, 37.62), x = xc, df1=df1, df2=df2)  
  return(out$root)       
}

#chi-square
ncp33chi =function(df) 
{      
  xc=qchisq(p=1-.05, df=df)
  #Find noncentrality parameter (ncp) that leads 33% power to obtain xc
  f = function(delta, pr, x, df) pchisq(x, df = df, ncp = delta) - 2/3
  out = uniroot(f, c(0, 37.62), x = xc, df = df)  
  return(out$root)  
}


###############################################################################

#Function 2 - percent() : makes a number look like a percentage
percent <- function(x, digits = 0, format = "f", ...)   {
  paste(formatC(100 * x, format = format, digits = digits, ...), "%", sep = "")
}
###############################################################################

#Create a "backup" so that final results table includes the n.s. results which will be excluded from most calculations
#(not really important here, key for the online app)
k=seq(from=1,to=length(tests))
backup=cbind(k,tests)

#(2) Split tests into t,F,X2 and Z

#2.1 Turn everything to lower case
tests=tolower(tests)

#2.2 Extract the type of test (stat={t,F,c,Z)
stat=substring(tests,1,1)   

#2.3 Split vector of tests into these
#get the t-tests
t.text=subset(tests,stat=="t")
#get the f-tests
f.text=subset(tests,stat=="f")
#get the chi2
c.text=subset(tests,stat=="c" | stat=="x")
#get the Z (normally distributed)
z.text=subset(tests,stat=="z")
#get the r (correlation) 
r.text=subset(tests,stat=="r")

#3 Get d.f. for the tests 
#3.1 t-test
#find the 2nd parenthesis
t.par=str_locate(t.text,")")[,1]
#Get the d.f. between both parenthesis
t.df=as.numeric(substring(t.text,3,t.par -1))

#3.2 f-test
#find the comma
f.comma=str_locate(f.text,",")[,1]
#find the 2nd parenthesis
f.par=str_locate(f.text,")")[,1]
#Get the df1  (between "(" and ","
f.df1=as.numeric(substring(f.text,3,f.comma -1))
#Get the df2  (between "," and ")"
f.df2=as.numeric(substring(f.text,f.comma +1,f.par -1))

#3.3 Chi-square  
#find the 1st parenthesis
c.par1=str_locate(c.text,"\\(")[,1]
#find the 2nd parenthesis
c.par2=str_locate(c.text,")")[,1]
#Get the d.f. between both parenthesis
c.df=as.numeric(substring(c.text,c.par1+1,c.par2 -1))

#3.4 Correlations
#fine the 2nd parenthesis
r.par=str_locate(r.text,"\\)")[,1]
#Get the d.f. between both parenthesis
r.df=as.numeric(substring(r.text,3,r.par -1))


#4 Get the test values
#4.1 Find the "=" sign
t.eq=str_locate(t.text,"=")[,1]
f.eq=str_locate(f.text,"=")[,1]
z.eq=str_locate(z.text,"=")[,1]
c.eq=str_locate(c.text,"=")[,1]
r.eq=str_locate(r.text,"=")[,1]  

#4.2 Get the number after the =
t.value=c();r.value=c()    

t.value=as.numeric(substring(t.text,t.eq+1,))
f.value=as.numeric(substring(f.text,f.eq+1,))
z.value=as.numeric(substring(z.text,z.eq+1,))
c.value=as.numeric(substring(c.text,c.eq+1,))
r.value=as.numeric(substring(r.text,r.eq+1,)) 

#4.3 merge r() with t-tests
rt.value=r.value/(sqrt((1-r.value**2)/r.df))  #r() expressed as a t-value 
t.value=c(t.value,rt.value)                     
t.df=c(t.df,r.df)
t.text=c(t.text,r.text)


#5 Keep significant p-values
#Compute p-values
t.p=2*(1-pt(abs(t.value),df=t.df))
f.p=1-pf(abs(f.value),df1=f.df1,df2=f.df2)
z.p=2*(1-pnorm(abs(z.value)))
c.p=1-pchisq(abs(c.value),df=c.df)

#Subset statistics and d.f.
#ts
t.value.sig=subset(t.value,t.p<.05)
t.df.sig   =subset(t.df,   t.p<.05)
t.text.sig =subset(t.text, t.p<.05)
t.p.sig    =subset(t.p,    t.p<.05)
#fs
f.value.sig=subset(f.value,f.p<.05)
f.df1.sig  =subset(f.df1,  f.p<.05)
f.df2.sig  =subset(f.df2,  f.p<.05)
f.text.sig =subset(f.text, f.p<.05)
f.p.sig    =subset(f.p,    f.p<.05)
#chis
c.value.sig=subset(c.value,c.p<.05)
c.df.sig   =subset(c.df,   c.p<.05)
c.text.sig =subset(c.text, c.p<.05)
c.p.sig    =subset(c.p,    c.p<.05)
#zs
z.value.sig=subset(z.value,z.p<.05)
z.text.sig =subset(z.text, z.p<.05)
z.p.sig    =subset(z.p,    z.p<.05)

#All significant p-values (used for binomial)
all.p.sig=c(t.p.sig, f.p.sig, c.p.sig, z.p.sig)
#Number of significant results
ktot=length(all.p.sig)    
#Number of non-signifcant results in p-curve
kns=length(tests)-ktot    

#6 Compute pp-values 
#6.1 For t-values
if (length(t.value.sig)>0)  #if nonempty compute pp-values
{
  #skew
  t.ppr=t.p.sig*(1/.05)               #pp-value for right-skew 
  t.ppl=1-t.ppr                         #pp-value for left-skew
  #33%power
  #Find the ncp (uses function from top)
  t.ncp33=mapply(ncp33t,t.df.sig)
  #Using the ncp33 compute pp33.
  t.pp33=3*(pt(t.value.sig,  df=t.df.sig,  ncp=t.ncp33)-2/3)
}

#6.2 For F-values
if (length(f.value.sig)>0)  #if nonempty compute pp-values
{
  f.ppr=f.p.sig*(1/.05)             #pp-value for right-skew 
  f.ppl=1-f.ppr                     #pp-value for left-skew
  f.ncp33=mapply(ncp33f, f.df1.sig,  f.df2.sig)
  f.pp33 =3*(pf(f.value.sig,  df1=f.df1.sig, df2=f.df2.sig,  ncp=f.ncp33)-2/3)
}

#6.3 z-values
if (length(z.value.sig)>0)  #if nonempty compute pp-values
{
  z.ppr=z.p.sig*(1/.05)
  z.ppl=1-z.ppr
  z.pp33=3*(pnorm(z.value.sig,mean=1.5285687,sd=1)-2/3)   #Compute pp33-values using the 'ncp' 1.5285687 which gives the normal 33% power
  
}

#6.4 chi-values
if (length(c.value.sig)>0)  #if nonempty compute pp-values
{
  c.ppr=c.p.sig*(1/.05)
  c.ppl=1-c.ppr
  c.ncp33=mapply(ncp33chi, c.df.sig)
  c.pp33=3*(pchisq(c.value.sig,  df=c.df.sig, ncp=c.ncp33)-2/3)
}


#7 STOUFFER: Overall tests aggregating pp-values (using Fisher's method to aggregate uniform distributions of (p)p-values)
#7.1 Convert pp-values to Z scores, aggregate them and divide by sqrt(ktot)
Zppr =sum(qnorm(c(t.ppr,  f.ppr  ,c.ppr,  z.ppr )))/sqrt(ktot)          #right skew
Zppl =sum(qnorm(c(t.ppl,  f.ppl  ,c.ppl,  z.ppl )))/sqrt(ktot)          #left skew
Zpp33=sum(qnorm(c(t.pp33, f.pp33 ,c.pp33, z.pp33)))/sqrt(ktot)          #33%

#7.2 Compute overall p-values
p.Zppr =pnorm(Zppr)	
p.Zppl =pnorm(Zppl)	
p.Zpp33=pnorm(Zpp33)	


#8 Green line (Expected p-curve for 33% power)
#8.1 t-tests 
if (length(t.value.sig)>0)  #if nonempty compute pp-values
{
  #Critical values,xc, for p=.05, .04, .03, .02 and ,01
  t.x5=qt(.975,df=t.df.sig); t.x4=qt(.98, df=t.df.sig); t.x3=qt(.985,df=t.df.sig); 
  t.x2=qt(.99, df=t.df.sig); t.x1=qt(.995,df=t.df.sig)
  #For Binomial test 
  t.x25=qt(.9875,df=t.df.sig)                            #critical value for t-tests to get p=.025
  t.plow=1- 3*(pt(t.x25,df=t.df.sig, ncp=t.ncp33)-2/3)   #prob(p<.025 | ncp33% & p<.05)
  
  #Probabilty of a p-value bigger  p=.05, .04, .03, .02 and .01 given p<.05 and ncp=ncp33
  t.pp4=3*(pt(t.x4,df=t.df.sig, ncp=t.ncp33)-2/3)
  t.pp3=3*(pt(t.x3,df=t.df.sig, ncp=t.ncp33)-2/3)
  t.pp2=3*(pt(t.x2,df=t.df.sig, ncp=t.ncp33)-2/3)
  t.pp1=3*(pt(t.x1,df=t.df.sig, ncp=t.ncp33)-2/3)
  #within bins proportions
  t.prop5=mean(t.pp4); 
  t.prop4=mean(t.pp3-t.pp4); 
  t.prop3=mean(t.pp2-t.pp3); 
  t.prop2=mean(t.pp1-t.pp2); 
  t.prop1=mean(1-t.pp1)                       
}
#8.2 f-tests 
if (length(f.value.sig)>0)  #if nonempty compute pp-values
{
  #Critical values,xc, for p=.05, .04, .03, .02 and ,01
  f.x5=qf(.95,df1=f.df1.sig, df2=f.df2.sig);  f.x4=qf(.96,df1=f.df1.sig, df2=f.df2.sig); f.x3=qf(.97,,df1=f.df1.sig, df2=f.df2.sig); 
  f.x2=qf(.98, df1=f.df1.sig, df2=f.df2.sig); f.x1=qf(.99,df1=f.df1.sig, df2=f.df2.sig) 
  #For binomial test
  f.x25 =qf(.975,df1=f.df1.sig, df2=f.df2.sig)                          #Critical F value for p=.025  
  f.plow=1-3*(pf(f.x25,df1=f.df1.sig, df2=f.df2.sig, ncp=f.ncp33)-2/3)  #Prob(p<.025|ncp33% & p<.05)
  
  
  #Probabilty of a p-value bigger  p=.05, .04, .03, .02 and .01 given p<.05 and ncp=ncp33
  f.pp4=3*(pf(f.x4,df1=f.df1.sig, df2=f.df2.sig, ncp=f.ncp33)-2/3)
  f.pp3=3*(pf(f.x3,df1=f.df1.sig, df2=f.df2.sig, ncp=f.ncp33)-2/3)
  f.pp2=3*(pf(f.x2,df1=f.df1.sig, df2=f.df2.sig, ncp=f.ncp33)-2/3)
  f.pp1=3*(pf(f.x1,df1=f.df1.sig, df2=f.df2.sig, ncp=f.ncp33)-2/3)
  
  #within bins proportions
  f.prop5=mean(f.pp4); 
  f.prop4=mean(f.pp3-f.pp4); 
  f.prop3=mean(f.pp2-f.pp3); 
  f.prop2=mean(f.pp1-f.pp2); 
  f.prop1=mean(1-f.pp1)
}
#8.3 chi-tests 
if (length(c.value.sig)>0)  #if nonempty compute pp-values
{
  
  #Critical values,xc, for p=.05, .04, .03, .02 and ,01
  c.x5=qchisq(.95,df=c.df.sig); c.x4=qchisq(.96, df=c.df.sig); c.x3=qchisq(.97,df=c.df.sig); 
  c.x2=qchisq(.98, df=c.df.sig); c.x1=qchisq(.99,df=c.df.sig) 
  
  #For binomial test
  c.x25 =qchisq(.975,df=c.df.sig)                                      #Critical x2 value for p=.025
  c.plow=1-3*(pchisq(c.x25,df=c.df.sig, ncp=c.ncp33)-2/3)              #Prob(p<.025|ncp33% & p<.05)
  
  #Probabilty of a p-value bigger  p=.05, .04, .03, .02 and .01 given p<.05 and ncp=ncp33
  c.pp4=3*(pchisq(c.x4,df=c.df.sig, ncp=c.ncp33)-2/3)
  c.pp3=3*(pchisq(c.x3,df=c.df.sig, ncp=c.ncp33)-2/3)
  c.pp2=3*(pchisq(c.x2,df=c.df.sig, ncp=c.ncp33)-2/3)
  c.pp1=3*(pchisq(c.x1,df=c.df.sig, ncp=c.ncp33)-2/3)
  
  #within bins proportions
  c.prop5=mean(c.pp4); c.prop4=mean(c.pp3-c.pp4); c.prop3=mean(c.pp2-c.pp3); 
  c.prop2=mean(c.pp1-c.pp2); c.prop1=mean(1-c.pp1)
}
#8.4 z-tests 
if (length(z.value.sig)>0)  #if nonempty compute pp-values
{
  #Critical values,xc, for p=.05, .04, .03, .02 and ,01
  z.x5=qnorm(.975); z.x4=qnorm(.98); z.x3=qnorm(.985); z.x2=qnorm(.99); z.x1=qnorm(.995) 
  # For Binomial test	
  z.x25 =qnorm(.9825)                                         #Critical x2 value for p=.025
  z.plow=1-3*(pnorm(z.x25,mean=1.5285687,sd=1)-2/3)           #Prob(p<.025|ncp33% & p<.05)
  #Probabilty of a p-value bigger  p=.05, .04, .03, .02 and .01, given p<.05 and ncp=ncp33
  z.pp4=3*(pnorm(z.x4,mean=1.5285687,sd=1)-2/3)
  z.pp3=3*(pnorm(z.x3,mean=1.5285687,sd=1)-2/3)
  z.pp2=3*(pnorm(z.x2,mean=1.5285687,sd=1)-2/3)
  z.pp1=3*(pnorm(z.x1,mean=1.5285687,sd=1)-2/3)
  #within bins proportions
  z.prop5=z.pp4; z.prop4=z.pp3-z.pp4; z.prop3=z.pp2-z.pp3; z.prop2=z.pp1-z.pp2; z.prop1=1-z.pp1
}


#9 combine t,F,chi,Z
#proportion of all tests that are of each type
t.share=length(t.value.sig)/ktot
f.share=length(f.value.sig)/ktot
c.share=length(c.value.sig)/ktot
z.share=length(z.value.sig)/ktot

#Average proportions within the 4 types of tests
t.props=c(t.prop1, t.prop2, t.prop3, t.prop4, t.prop5)
f.props=c(f.prop1, f.prop2, f.prop3, f.prop4, f.prop5)
c.props=c(c.prop1, c.prop2, c.prop3, c.prop4, c.prop5)
z.props=c(z.prop1, z.prop2, z.prop3, z.prop4, z.prop5)

#overall proportions (i.e.., THE GREEN LINE)
green=100*(t.props*t.share + f.props*f.share + c.props*c.share + z.props*z.share)


#10 The blue line (observed p-curve)   

#Put each p-value in a bin between 0 and .05
ps=ceiling(c(all.p.sig)*100)/100
#Count them
blue01=sum(ps<=.01)/ktot; blue02=sum(ps==.02)/ktot; blue03=sum(ps==.03)/ktot; 
blue04=sum(ps==.04)/ktot; blue05=sum(ps==.05)/ktot; 
#combine
blue=c(blue01,blue02,blue03,blue04,blue05)*100
#Note: i could have used the Table command, but it is a pain if there are no p-value in a given range

#11 Red line
red=c(20,20,20,20,20)

#12 Carry out binomial test
#Note: for t and Z test, the critical value is for p=.0125 one sided, for Z and Chi2 it is for .025 two-sided


#12.1 Combine the prob(p<.025) for each set of tests
plows=c(t.plow, f.plow, c.plow, z.plow)  
#12.2 Compute observed shared of p<.025 results
low.obs=sum(all.p.sig<=.025)
#12.3 Right skew: Compare observed share p<.025 with null of 50:50 and altenrative of more p<.025 than expected
binom.r=1-pbinom(q=low.obs-1, p=.5, size=ktot)     #The binomial in R computes the probability of x<=xo. We want prob(x>=x0) so we subtract one from x, and 1-prob()
#12.4 Left skew: Compare observed share p<.025 with null of 50:50 and altenrative of fewer p<.025 than expected
binom.l=pbinom(q=low.obs, p=.5, size=ktot)         #Here the default x<=x0 is what we want
#12.5 33% power: Compare observed share p<.025 with expected share ~72% based on the combination of expected shares for ncp33%
#              The probability of p<.025|ncp33 is slightly different for each test, hence I use the poisson binomial distribtuion (see reference top of this document)
binom.33=ppoibin(kk=low.obs,pp=plows)              


#13.POWER ESTIMATION

#13.1 SET OF FUNCTIONS 1. COMPUTE GAP BETWEEN POWER AND DESIRED POWER FOR A GIVEN NCP 
# (minimize these in the next step to solve for the ncp that gives the desired power)
ncp_error.t = function(delta, power, x, df)      pt(x, df = df, ncp = delta) - (1-power)   #if this equals 0, we found the ncp.
ncp_error.f = function(delta, power, x, df1,df2) pf(x, df1 = df1, df2=df2, ncp = delta) - (1-power)   
ncp_error.c = function(delta, power, x, df)      pchisq(x, df = df, ncp = delta) - (1-power)   
ncp_error.z = function(delta, power, x)          pnorm(x, mean = delta,sd=1) - (1-power)   

#13.2 SET OF FUNCTIONS 2: MINIMIZE FUNCTIONS ABOVE
#t-test
getncp.t =function(df, power)   {      
  xc=qt(p=.975, df=df) # critical t-value
  return(uniroot(ncp_error.t, c(0, 37.62), x = xc, df =df, power=power)$root)   }  

#F-test
getncp.f =function(df1,df2, power)   {      
  xc=qf(p=.95, df1=df1,df2=df2) # critical F-value
  return(uniroot(ncp_error.f, c(0, 37.62), x = xc, df1 = df1,df2=df2, power=power)$root)  }


#chisq-test
getncp.c =function(df, power)   {      
  xc=qchisq(p=.95, df=df) # critical c-value
  return(uniroot(ncp_error.c, c(0, 37.62), x = xc, df = df, power=power)$root)   }

#Normal
getncp.z =function(power)   {      
  xc=qnorm(p=.975) # critical Z-value with df=1
  return(uniroot(ncp_error.z, c(0, 37.62), x = xc, power=power)$root)   }             

# 13.3 CREATE PP-VALUES FOR EACH OF THE FOUR DISTRIBUTIONS FOR HOW WELL A GIVEN POWER_EST FITS 
powerfit.t=function(t_obs, df_obs, power_est)    {
  ncp_est=mapply(getncp.t,df=df_obs,power=power_est)  #find ncp for each  that gives each test power.k
  p_larger=pt(t_obs,df=df_obs,ncp=ncp_est)            #prob t>tobs given ncp_est
  ppr=(p_larger-(1-power_est))/power_est              #condition on p<.05
  return(ppr)   }

powerfit.f=function(f_obs, df1_obs, df2_obs, power_est)    {
  ncp_est=mapply(getncp.f,df1=df1_obs, df2=df2_obs,power=power_est)  #find ncp for each  that gives each test power.k
  p_larger=pf(f_obs,df1=df1_obs,df2=df2_obs, ncp=ncp_est)        #prob t>tobs given ncp_est
  ppr=(p_larger-(1-power_est))/power_est          #condition on p<.05
  return(ppr)   }

powerfit.z=function(z_obs, power_est)    {
  ncp_est=mapply(getncp.z,power=power_est)  
  p_larger=pnorm(z_obs,mean=ncp_est)        
  ppr=(p_larger-(1-power_est))/power_est          
  return(ppr)     }


powerfit.c=function(c_obs, df_obs, power_est)    {
  ncp_est=mapply(getncp.c,df=df_obs,power=power_est)  
  p_larger=pchisq(c_obs,df=df_obs,ncp=ncp_est)        
  ppr=(p_larger-(1-power_est))/power_est          
  return(ppr)   }

#13.4  STACK-UP ALL THE PP-VALUES INTO A VECTOR AND COMPARE THEM TO UNIFORM DISTRIBUTION USING KOLMOGOROV-SMIRNOV TEST

powerfit.all=function(power_est)
{
  ppr.all=c()
  #for each kind of test, check if there are any significant values, if there are, add ppr to overall ppr
  if (length(t.value.sig)>0) ppr.all=c(ppr.all, powerfit.t(t_obs=t.value.sig, df_obs=t.df.sig, power_est=power_est))
  if (length(f.value.sig)>0) ppr.all=c(ppr.all, powerfit.f(f_obs=f.value.sig, df1_obs=f.df1.sig, df2_obs=f.df2.sig, power_est=power_est))
  if (length(z.value.sig)>0) ppr.all=c(ppr.all, powerfit.z(z_obs=z.value.sig, power_est=power_est))
  if (length(c.value.sig)>0) ppr.all=c(ppr.all, powerfit.c(c_obs=c.value.sig, df_obs=c.df.sig, power_est=power_est))
  KSD=ks.test(ppr.all,punif)$statistic                #KS test on the resulting pprs
  return(KSD)
}

#13.5 FUNCTION THAT COMPUTES FIT FOR EACH LEVEL OF POWER, AND PLOT IT


plotfit=function()
{
  # Fit will be evaluated at every possible value of power between 5.1% and 99% in steps of 1%, stored in fit()
  fit=c()                                          #Create empty vector
  fit=powerfit.all(.051)                           #First evaluate fit for power of 5.1%, the lowest one can get for non-directional tests like x2 and F
  for (i in 6:99)   fit=c(fit,powerfit.all(i/100)) #Now do 6% to 99%
  # Find the minimum
  mini=match(min(fit),fit)       #which ith power level considered leads to best estimate
  hat=(mini+4)/100               #convert that into the power level, the ith value considered is (5+ith)/100
  #Plot results
  #create the x-axis
  x.power=seq(from=5,to=99)/100 
  #Draw the line
  par(mar=c(5.1,8.1,4.1,2.1))
  plot(x.power,fit,xlab="Underlying Power", ylab="",ylim=c(0,1), main="")  
  #Make red dot at the estimate
  points(hat,min(fit),pch=19,col="red",cex=2)    
  #Put a label with the estimate value
  sign="="
  if (hat<.06) sign="<"
  text(min(.7,max(.28,hat)),min(fit)-.1,paste0("Estimated Power ",sign," ",hat*100,"%"))
  mtext(c("Perfect","Terrible"),side=2,line=3,at=c(0,1),las=1,cex=1.25,col=c("blue","red"))
  mtext("How Good Is the Fit?",side=2,line=6.5,cex=1.5)
  mtext("(Kolmogorov-Smirnov D Stat)",side=2,line=5.5,col="gray")
  mtext("Do we have a good estimate of power?",side=3,line=1.75,cex=1.5,at=0.4)
  mtext("If you see a V-Shape with a low minimum-->yes",side=3,line=0.5,cex=1.25,at=0.4)
  
}
if(isTRUE(showPlot)){
#Create two graphs in a single chart
#par(mfrow=c(2,1)) 
par(mfrow=c(1,1)) 

#14 Firest the p-curve itself
#Define x-axis as p-values (.01, .02..)
x = c(.01,.02,.03,.04,.05)

#Plot the observed p-curve

plot(x,blue,   type='l', col='dodgerblue2',  main="",lwd=2, xlab="", ylab="", 
     xaxt="n",yaxt="n", xlim=c(0.01,0.055), ylim=c(0,105), bty='L', las=1);  	

#x-axis value labels
x_=c(".01",".02",".03",".04",".05")
axis(1,at=x,labels=x_)
#y-axis value labels
y_=c("0%","25%","50%","75%","100%")
y=c(0,25,50,75,100)
axis(2,at=y,labels=y_,las=1,cex.axis=.75)

#Add y-axis label
mtext("Percentage of test results",font=2,side=2,line=2.75,cex=1.25)
#Add y-axis label
mtext("p            ",font=4,side=1,line=2.5,cex=1.25)
mtext(" -value",font=2,side=1,line=2.5,cex=1.25)


#Add little point in actual frequencies
points(x,blue,type="p",pch=20,bg="dodgerblue2",col="dodgerblue2")
#Add value-labels
text(x+.00075,blue+5,percent(round(blue)/100),col='black', cex=.75)
#Add red and green lines
lines(x,red,   type='l', col='firebrick2',    lwd=1.5, lty=3)
lines(x,green, type='l', col='springgreen4',  lwd=1.5, lty=5)

#Legend
#By default its x-position, legendx, is in the middle
legendx=.035 ; 
#Move left for p-curves that have more 80% of p-values =.02 or =.03 so that the legend does not touch blue line
if (blue04>.80 | blue05>.80) legendx=.02
#Print legend
legend(legendx, 100, c('Observed p-curve','Null of 33% power', 'Null of zero effect'), 
       box.col="white",lty=c(1,5,3), cex=.75,lwd=c(1,1),col=c('dodgerblue2','springgreen4', 'firebrick2'));

#ADD THE POWER FIT CHART
#plotfit()
}
#PRINT OUT RESULTS
printout=function()
{cat("\nTest for right-skew....Binomial: ",binom.r,"   Continuous: Z=",Zppr,"  p=",p.Zppr)
  cat("\nTest for 33%....Binomial: ",binom.33,"   Continuous: Z=",Zpp33,"  p=",p.Zpp33)
  cat("\nTest for left-skew....Binomial: ",binom.l,"   Continuous: Z=",Zppl,"  p=",p.Zppl)
}

if(isTRUE(showText)){
printout()
}

}


# 
# ################################################
# #15 Cumulative test
# #MAKE CUMULATIVE CONDITIONAL ON SIGNIFICNCE SO IF RIGHT-SKEW IS SIGNIFICANT, EXCLUDE LOWEST P-VALUES
# #BUT IF RIGHT SKEW IS N.S., EXCLUEDE HIGHEST P-VALUES
# #OR MAYBE DO IT BOTH WAYS ALWAYS
# 
# #If right skew is significant, assess robustness to excluding most significnat finding, one at a time
# all.zppr = sort(qnorm(c(t.ppr,  f.ppr  ,c.ppr,  z.ppr )))
# all.zppl = sort(qnorm(c(t.ppl,  f.ppl  ,c.ppl,  z.ppl )))
# all.zpp33= sort(qnorm(c(t.pp33,f.pp33 ,c.pp33, z.pp33 )))
# 
# droplow.zr=droplow.zl=droplow.z33=drophigh.zr=drophigh.zl=drophigh.z33=c()
# for (i in 1:(ktot))
# {
#   droplow.zr[i] =sum(all.zppr[i:ktot]/sqrt(ktot-i+1))
#   droplow.zl[i] =sum(all.zppl[i:ktot]/sqrt(ktot-i+1))
#   droplow.z33[i]=sum(all.zpp33[i:ktot]/sqrt(ktot-i+1))
#   
#   drophigh.zr[i] =sum(all.zppr[1:(ktot-i+1)]/sqrt(ktot-i+1))
#   drophigh.zl[i] =sum(all.zppl[1:(ktot-i+1)]/sqrt(ktot-i+1))
#   drophigh.z33[i]=sum(all.zpp33[1:(ktot-i+1)]/sqrt(ktot-i+1))
# }
# 
# 
# 
# #19.2 FUNCTION THAT PLOTS RESULTS
# plotdrop=function(var)
# {
#   #Plot the dots
#   plot(0:(ktot-1),pnorm(var),xlab="",ylab="",type="b",yaxt="n",main="",cex.main=1.15,ylim=c(0,1))
#   #Add marker in results with 0 drops
#   points(0,pnorm(var[1]),pch=19,cex=1.6)
#   #Red line at p=.05
#   abline(h=.05,col="red")  #Red line at p=.05
#   #Y-axis value labels
#   axis(2,c(.01,.05,.1,seq(from=.2,to=.9,by=.10)),las=1,cex.axis=.95)
# }
# 
# #19.3 RUN PLOT FUNCTION 6 TIMES
# #Put all graphs together
# dev.off()  
# par(mfrow=c(3,2),mar=c(4,4,1,2),mgp=c(2.5,1,0),oma=c(5,14,5,1)) 
# #Plot(1)
# plotdrop(droplow.zr)
# mtext(side=2,line=4,"P-value Overall Test",font=2,cex=.85)
# mtext(side=2,line=3,"(Stouffer's Method)",font=3,cex=.75)
# #Rigt Skew label
# mtext("Right Skew",line=8,side = 2,cex=1.2,las=1,col="Blue") 
# #Low to high label
# mtext(bquote("Drop"~italic(k)~bold("lowest")~"original p-values"),line=1,side = 3,cex=1.5,las=1) 
# #Plot(2)
# plotdrop(drophigh.zr)
# mtext(bquote("Drop"~italic(k)~bold("highest")~"original p-values"),line=1,side = 3,cex=1.5,las=1) 
# #Plot (3)
# plotdrop(drophigh.z33)
# mtext(side=2,line=4,"P-value Overall Test",font=2,cex=.85)
# mtext(side=2,line=3,"(Stouffer's Method)",font=3,cex=.75)
# #33%  Skew label
# mtext("33% Power%",line=8,side = 2,cex=1.2,las=1,col="springgreen4") 
# #Plot (4)
# plotdrop(droplow.z33)
# #Plot (5)
# plotdrop(drophigh.zl)
# mtext(side=2,line=4,"P-value Overall Test",font=2,cex=.85)
# mtext(side=2,line=3,"(Stouffer's Method)",font=3,cex=.75)
# mtext(side=1,line=2.5,"K Tests Dropped From p-curve",cex=1.15,font=2)
# #33%  Skew label
# mtext("Left Skew",line=8,side = 2,cex=1.2,las=1,col="red") 
# #Plot (6)
# plotdrop(droplow.zl)
# #x-axis label
# mtext(side=1,line=2.5,"K Tests Dropped From p-curve",cex=1.15,font=2)
# #Legend (winging it for location)
# op=par(usr=c(0,1,0,1),xpd=NA)   #allow goin outsie of plot 6 and recalibrate dimensions to be 0-1 in x and y
# legend(-.6,-.25,horiz=TRUE,pch=c(19,1),cex=1.4, legend=c("Including all p-values","Dropping p-values"))   
# #so the legend is placed 60% of a chart to the left of 0 of #6, an 25% of a chart below it.
# 
# 
kylehamilton/MAJOR documentation built on May 27, 2021, 5:48 a.m.