knitr::opts_chunk$set(echo = TRUE)

pooledmaRket

PooledMarketR is an R package built to provide the Pooled Market Dataset.

This dataset pools together the data from four similar prediction market on the results of systematic replication studies.

The systematic replication studies are:

And the accompanying prediction markets are:

In the dataset the prediction markets are referred to as 'projects'. I would recommend reading the above studies before using the package. The studies will explain the methodologies and provide context to the data.

What this package contains

How to use this package

The below example calculates the average survey belief and the final market price for each of the 102 studies and the collates them into a single table.

library(dplyr)
library(tidyr)
library(devtools)

install_github("michaelbgordon/pooledmaRket")

library(pooledmaRket)

aggregated_survey <-  survey_data %>% 
  group_by(project,finding_id) %>% 
  summarise(average_survey_response = mean(response))

aggregated_market <- market_data %>% 
  group_by(project, finding_id) %>% 
  arrange(time_stamp) %>% 
  summarise(final_market_price = dplyr::last(price))


study_predictions <- finding_data %>% 
  left_join(aggregated_survey) %>% 
  left_join(aggregated_market) 

The data can be used to assess and compare the characteristics of various forecasting methods. Such as comparing survey with prediction market results

library(ggplot2)
library(ggsci)

graph_data <- study_predictions %>% 
  mutate(`Replication Status` = ifelse(replicated == 0, 'Unsuccessful Replication', 'Successful Replication')) %>% 
  arrange(final_market_price) %>% 
  mutate(graph_order = c(1:103))



ggplot(graph_data) + 
  geom_point(aes(x=final_market_price, y=average_survey_response, colour = `Replication Status`), size = 3) +
  scale_color_manual(values = c( "#3CB371","#EE5C42")) +
  labs(y = "Average Survey Response", x = "Final Market Price") +
  theme_light() +
  geom_vline(xintercept = 0.5,linetype='dashed') +
  geom_hline(yintercept = 0.5,linetype='dashed') +
  ggplot2::annotate("text", label = stringr::str_wrap("Predicted to replicate by survey",20), x =  0, y = 0.55, size = 5,   colour = "#3CB371",hjust = 0) +
  ggplot2::annotate("text", label = stringr::str_wrap("Predicted to not replicate by survey",20), x =  0, y = 0.45, size = 5, colour = "#EE5C42",hjust = 0) +
  ggplot2::annotate("text", label = stringr::str_wrap("Predicted to replicate by market",20), x =  0.52, y = 1, size = 5, colour = "#3CB371",hjust = 0) +
  ggplot2::annotate("text", label = stringr::str_wrap("Predicted to not replicate by market",20), x =  0.48, y = 1, size = 5, colour = "#EE5C42",hjust = 1) +
  scale_x_continuous(limits=c(0, 1), breaks=c(0,0.25,0.5,0.75,1)) +
  scale_y_continuous(limits=c(0, 1),breaks=c(0,0.25,0.5,0.75,1)) +
  theme(legend.justification=c(1,0), 
        legend.position=c(1,0),
    panel.grid = element_blank(),
    axis.text = element_text(colour = 'black', size = 15),
    axis.title = element_text(colour = 'black', size = 20), 
    legend.title = element_text(colour = 'black', size = 20), 
    legend.text = element_text(colour = 'black', size = 15), 
  ) 

Comparing final market prices with outcomes:

ggplot(graph_data) +
  geom_point(aes(x=final_market_price, y=graph_order, colour = `Replication Status`), size = 3) +
  scale_color_manual(values = c( "#3CB371","#EE5C42")) +
  theme_light() +
  geom_vline(xintercept = 0.5,linetype='dashed') +
  labs(y = "Findings (ordered by price)", x = "Final Market Price") +
  theme(legend.justification=c(1,0), 
        legend.position=c(1,0),
        panel.grid = element_blank(),
        axis.text.y=element_blank(),
        axis.text = element_text(colour = 'black', size = 15),
        axis.title = element_text(colour = 'black', size = 20), 
        legend.title = element_text(colour = 'black', size = 20), 
        legend.text = element_text(colour = 'black', size = 15), 
  )

Compare the distributions of different survey aggregations and prediction market

graph_data <- graph_data %>% 
  rename(`Final Market Price` = final_market_price, 
         `Mean Survey Response` = average_survey_response) %>% 
  gather(`Final Market Price`,`Mean Survey Response`, key = measure, value = forecast)

other_aggregations <- survey_data %>% 
  group_by(project,finding_id) %>% 
  summarise(Mean = mean(response),
            Median = median(response),
            Voting = mean(round(response,0))) %>% 
  left_join(aggregated_market) %>% 
  rename(`Final Market Price` = final_market_price) %>% 
  gather(Mean, Median, Voting, `Final Market Price`, key =measure, value = forecast) 



figurepart1 <- ggplot(graph_data, aes(x= forecast, y = measure, colour = measure)) +
  scale_color_aaas() +
  labs(y = NULL, x = "Forecast (%)") +
  theme_light() +
  theme(
    legend.position = "none",
    strip.background =element_rect(fill="white"),
    strip.text = element_text(colour = 'black', size = 15),
    plot.title = element_text(colour = 'black', size = 20, hjust = 0),
    panel.grid = element_blank(),
    axis.text = element_text(colour = 'black', size = 15),
    axis.title = element_text(colour = 'black', size = 20)
  )  + 
  geom_boxplot(color = "gray60", outlier.alpha = 0,orientation = 'y') +
  geom_jitter(size = 3, alpha = 1, height = 0.1) +
  facet_wrap(~project) +
  labs(y = NULL, x = NULL) +
  labs(tag = "A")






figurepart2 <- ggplot(other_aggregations, aes(x= forecast, y = measure, colour = measure)) +
  scale_color_aaas() +
  labs(y = NULL, x = "Forecast (%)") +
  theme_light() +
  theme(
    legend.position = "none",
    strip.background =element_rect(fill="white"),
    strip.text = element_text(colour = 'black', size = 15),
    plot.title = element_text(colour = 'black', size = 20, hjust = 0),
    panel.grid = element_blank(),
    axis.text = element_text(colour = 'black', size = 15),
    axis.title = element_text(colour = 'black', size = 20)
  )  +
  geom_boxplot(color = "gray60", outlier.alpha = 0,orientation = 'y') +
  geom_jitter(size = 3, alpha = 1, height = 0.1) +
  labs(tag = "B")


gridExtra::grid.arrange(figurepart1 , figurepart2  ,  nrow = 2)


MichaelbGordon/pooledmaRket documentation built on March 10, 2021, 11:22 p.m.