In lupyanlab/programming-questionnaire: Programming Questionnaire

library(knitr)
library(shiny)
opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE,
               fig.width=4, fig.height=4, dpi=144,
               cache=FALSE)

library(tidyverse)
library(ggridges)
library(patchwork)
library(magrittr)

library(programmingquestionnaire)
data("questionnaire")
data("languages")
data("questions")

top20_languages <- languages %>%
  count(language_name) %>%
  top_n(20, n) %>%
  .$language_name

question_names <- c(
  "cr1", "cp1",
  "repo1","rec1", "cfo1", "cfo2",
  "env1", "env2", "inter1", "inter2",
  # agreement w/ other questions
  "oss1", "fp3", "fp4", "psych",
  # multiple choice questions
  "fp1", "pipe1", "computer"
)

most_proficient_languages <- languages %>%
  group_by(subj_id) %>%
  filter(proficiency > 4 | proficiency == max(proficiency)) %>%
  select(subj_id, language_name)

questionnaire_by_language <- questionnaire %>%
  select(subj_id, contains("_int")) %>%
  rename_all(funs(gsub("_int", "", .))) %>%
  select(subj_id, !!!question_names) %>%
  gather(question_name, response, -subj_id) %>%
  drop_na() %>%
  merge(most_proficient_languages) %>%
  as_data_frame() %>%
  filter(language_name %in% top20_languages)

Which multiple choice questions do people who know different programming languages diverge on the most?

This question is difficult to answer because each person knows multiple programming languages, and not all at the same level of proficiency. As a result:

Responses are only counted toward programming languages with high proficiency (>= 4). If a person did not report any programming languages with proficiency >= 4, the most proficient language was used.
Only the top 20 most commonly known languages are reported.

Divergence was calculated as the difference between the highest and lowest average response by people who know different languages on a particular question. The results are reported in the table below. The distributions of responses by each language can be viewed in the ridgelines plot below the table.

checkboxInput(
  inputId = "show_multiple_choice_questions_table",
  label = "Show table of multiple choice questions",
  value = 0
)

conditionalPanel(
  'input.show_multiple_choice_questions_table == 1',
  renderTable({
    filter(questions, question_name %in% question_names) %>%
      select(question_name, question_text)
  },
    caption = "Multiple choice questions.",
    caption.placement = "top"
  )
)

renderTable({
  questionnaire_by_language %>%
    split(.$question_name) %>%
    map_dfr(function(question_by_language) {
      range_ <- question_by_language %>%
        group_by(language_name) %>%
        summarize(response = mean(response)) %>%
        .$response %>%
        range()

      data_frame(question_name = question_by_language$question_name[[1]], divergence = range_[2] - range_[1])
    }) %>%
    arrange(desc(divergence))
  })

selectInput(
  inputId = "question_name",
  label = "Question",
  choices = question_names,
  selected = 1
)

renderText({
  filter(questions, question_name == input$question_name)$question_text
})

renderPlot({
  question_by_language <- filter(questionnaire_by_language, question_name == input$question_name)

  language_means <- question_by_language %>%
    group_by(language_name) %>%
    summarize(response = mean(response))

  ordered_languages <- language_means %>%
    arrange(response) %>%
    .$language_name

  question_by_language$language_ordered <- factor(question_by_language$language_name, levels = ordered_languages)
  language_means$language_ordered <- factor(language_means$language_name, levels = ordered_languages)

  dist_plot <- ggplot(question_by_language) +
    aes(response, language_ordered) +
    geom_density_ridges(bandwidth = 0.5) +
    geom_segment(data = language_means,
                 aes(x = response, xend = response, y = as.numeric(language_ordered), yend = as.numeric(language_ordered) + .9)) +
    ylab("") +
    theme_ridges()

  ylim <- max(count(question_by_language, language_ordered)$n)

  hist_plot <- ggplot(question_by_language) +
    aes(language_ordered) +
    geom_histogram(stat = "count") +
    stat_count(geom = "text", aes(label = ..count..), hjust = -0.5) +
    labs(x = "") +
    theme_ridges() +
    coord_flip(ylim = c(0, ylim + 5))

  dist_plot + hist_plot +
    plot_layout(widths = c(0.6, 0.4))
})