Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, tidy = FALSE)
options(width = 80)
library(knitr)
library(rmarkdown)
library(rmcorr)
library(ggplot2)
library(dplyr)
library(patchwork)
library(esc)
library(psych)
## ----eval = FALSE-------------------------------------------------------------
# install.packages("dplyr")
# require(dplyr)
#
# install.packages("esc")
# require(esc)
#
# install.packages("patchwork")
# require(patchwork)
## ----echo = FALSE-------------------------------------------------------------
N.Marusich2016 <- length(unique(marusich2016_exp2$Pair))
k.Marusich2016 <- length(unique(marusich2016_exp2$SourceReliablity))
total.obs.Marusich2016 <- length(marusich2016_exp2$Pair)
## -----------------------------------------------------------------------------
overfit.plot <-
ggplot(data = marusich2016_exp2, aes(x = MARS, y = HVT_capture)) +
geom_point(aes(colour = factor(Pair))) +
geom_smooth(method= "lm", level = 0.95) +
coord_cartesian(xlim = c(2,4), ylim=c(0,30)) +
ylab("Capture Time (seconds)") +
theme_minimal() +
theme(legend.position="none")
marusich2016_avg <- marusich2016_exp2 %>%
group_by(Pair) %>%
summarize(Mean_MARS = mean(MARS),
Mean_HVT_capture = mean(HVT_capture))
average.plot <-
ggplot(data = marusich2016_avg,
aes(x = Mean_MARS, y = Mean_HVT_capture)) +
geom_smooth(fullrange = TRUE, method= "lm", level = 0.95) +
coord_cartesian(xlim = c(2,4), ylim=c(0,30)) +
geom_point(aes(colour = factor(Pair))) +
xlab("Mean MARS") +
ylab("Mean Capture Time (seconds)") +
scale_colour_discrete(name = "Dyad") +
theme_minimal()
overfit.cor <- cor.test(marusich2016_exp2$MARS, marusich2016_exp2$HVT_capture)
average.cor <- cor.test(marusich2016_avg$Mean_MARS, marusich2016_avg$Mean_HVT_capture)
df.s <- rbind(overfit.cor$parameter, average.cor$parameter)
r.s <- rbind(round(rbind(overfit.cor$estimate, average.cor$estimate), digits = 2))
CI.s <- formatC(rbind(overfit.cor$conf.int,
average.cor$conf.int), digits = 2,
format = 'f')
p.vals <- rbind(round(overfit.cor$p.value, digits = 3),
prettyNum(average.cor$p.value, digits = 2,
drop0trailing = TRUE))
overfit.plot + average.plot
## -----------------------------------------------------------------------------
marusich.rmc <- rmcorr(participant = Pair,
measure1 = MARS,
measure2 = HVT_capture,
data = marusich2016_exp2)
rmc.plot <-
ggplot(data = marusich2016_exp2,
aes(x = MARS, y = HVT_capture, group = factor(Pair), color = factor(Pair))) +
geom_point(aes(colour = factor(Pair))) +
geom_line(aes(y = marusich.rmc$model$fitted.values)) +
theme_minimal() +
scale_colour_discrete(name = "Dyad") +
ylab("Capture Time (seconds)")
rmc.r <- round(marusich.rmc$r, 2)
rmc.CI <- round(marusich.rmc$CI, 2)
rmc.p <- formatC(marusich.rmc$p, format = "fg", digits = 2)
# prettyNum(average.cor$p.value, digits = 2,
# drop0trailing = TRUE))
rmc.plot
## -----------------------------------------------------------------------------
N.vals <- seq(5, 100, by = 1)
sd.Z <- 1/sqrt(N.vals-3)
sd.overfit <- data.frame(cbind(N.vals, sd.Z))
sd.cor <- sd.Z[N.Marusich2016-4] #Have to subtract 4 because N.vals starts at 5, not 1
sd.overf <- sd.Z[total.obs.Marusich2016-4]
#We could determine the inflated sample size for the overfit model using its degrees of freedom + 2
df.s + 2 == total.obs.Marusich2016
sd.overfit.plot <-
ggplot(data = sd.overfit,
aes(x = N.vals, y = sd.Z)) +
coord_cartesian(xlim = c(0,100), ylim=c(0, 0.75)) +
geom_point(alpha = 0.25) +
geom_segment(x = 28, y = sd.cor, xend = 84,
yend = sd.cor, linetype = 2) +
geom_segment(x = 84, y = sd.cor, xend = 84,
yend = sd.overf, colour = "red",
linetype = 2) +
xlab("Analysis Sample Size") +
ylab(expression(paste("Fisher ", italic("Z"), " standard deviation"))) +
annotate(geom = "point", x = 28, y = sd.cor,
colour = "black", size = 2) +
annotate(geom = "point", x = 84, y = sd.overf,
colour = "red", size = 2) +
theme_minimal()
sd.overfit.plot
## -----------------------------------------------------------------------------
#Calculate the expected correlation using the reported p-value and actual sample size
#esc_t assumes p-value is two-tailed and uses a Fisher Z transformation
calc.z <- esc_t(p = as.numeric(p.vals[1]),
totaln = N.Marusich2016,
es.type = "r")$es
reported.r = as.numeric(r.s[1])
#Overfit?
fisherz2r(calc.z) != abs(reported.r)
#Note calc.z will always be positive using esc_t() this way
#If the reported.r is not a Fisher Z value - either it should be transformed or the calc.z should be transformed
fisherz2r(calc.z)
reported.r
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.