knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE,
  message = FALSE,
  echo = FALSE,
  comment = "#>",
  fig.path = "figures"
)

library(saaabstracts)
library(readxl)
library(tidyverse)
library(knitr)
library(glue)

Introduction

How many sessions do we have?

We can combine general and organised sessions, for the purpose of scheduling. But we have to get the after hours sessions and put them aside.

library(readxl)

# Here we get all the general session posters
# session number is cuts column
gen_poster_session_posters_individual <- read_excel("../data/raw_data/02 - poster_sessions_FINAL(oct31).xlsx")
gen_poster_session_posters_individual$`Session Id` <- gen_poster_session_posters_individual$cuts
# 422 posters in the general session

# grab the two general sessions from the 
# ah posters from the other sheet (very inconvenient!)
ah_poster_session_posters_individual <- read_excel("../data/raw_data/02 - poster_sessions_FINAL(oct31).xlsx", sheet = "AFTER_HOURS", skip = 11)
ah_poster_session_posters_individual$`Session Id` <- ah_poster_session_posters_individual$cuts

ah_poster_session_posters_individual <- 
ah_poster_session_posters_individual[, names(ah_poster_session_posters_individual) %in% names(gen_poster_session_posters_individual)  ]

gen_poster_session_posters_individual <- 
  rbind(gen_poster_session_posters_individual,
        ah_poster_session_posters_individual)

# In here we have org and after hours posters
# but not general posters

all_abstracts  <-  read_excel("../data/raw_data/Organized Session Abstracts_2018_AB.xlsx") 

org_and_after_hours_posters_individual <- 
all_abstracts %>% 
  filter(`Session Type` == "Poster Symposium") %>% 
  filter(Role == "Presenter")
# 196 posters in org + ah sessions

# combine general, organised and after hours individual posters
common_names <- 
names(gen_poster_session_posters_individual)[names(gen_poster_session_posters_individual) %in% names(org_and_after_hours_posters_individual)]

gen_poster_session_posters_individual <- 
  gen_poster_session_posters_individual %>% 
  select(common_names)

org_and_after_hours_posters_individual <- 
  org_and_after_hours_posters_individual %>% 
    select(common_names)

all_posters_individual <- 
  bind_rows(gen_poster_session_posters_individual,
            org_and_after_hours_posters_individual) %>% 
  filter(!is.na(Title))

We have r nrow(all_posters_individual) posters altogether, with r nrow(gen_poster_session_posters_individual) in the general session, and r nrow(org_and_after_hours_posters_individual) in the organised sessions (including after hours posters).

Let's remove the after hours sessions from all the posters, since we know they will be assigned to Th-evening:

# how many ah sessions?
after_hours_poster_sessions_only <- 
  read_excel("../data/raw_data/02 - poster_sessions_FINAL(oct31).xlsx", 
             sheet = "AFTER_HOURS") %>% 
  slice(1:9) %>% 
  mutate(`Session Id` = as.numeric(gsub("\\D", "", ID)))

# how many ah posters?
after_hours_poster_sessions_posters_individual <- 
  all_posters_individual %>%
  filter(`Session Id` %in% after_hours_poster_sessions_only$`Session Id`) %>% 
  mutate(poster_session_time = "Th-evening")

# remove these from all posters
all_posters_individual_no_ah <- 
  all_posters_individual %>% 
  filter(!`Session Id` %in% after_hours_poster_sessions_only$`Session Id`)

# put out the after hours sessions and posters in a CSV
write_csv(after_hours_poster_sessions_posters_individual,
          "../data/derived_data/after_hours_poster_sessions_all_posters.csv")

# Make sure that we don't have the Thu-evening posters in the remaining posterings
all_posters_individual_no_ah$`Session Id`[all_posters_individual_no_ah$`Session Id` %in% after_hours_poster_sessions_posters_individual$`Session Id`] 

We have r length(unique(after_hours_poster_sessions_only$'Session Id')) sessions of after-hours posters, with a total of r nrow(after_hours_poster_sessions_posters_individual) posters. This leaves r nrow(all_posters_individual_no_ah) remaning to allocate in the day-parts.

100 posters are permitted in the after-hours session. We have r nrow(after_hours_poster_sessions_posters_individual) posters in the after hours session. That's r ifelse(nrow(after_hours_poster_sessions_posters_individual) <= 100, "great", "not ideal").

We have from the SAA information about how many posters can go into each session

# poster session durations
library(stringi)
poster_session_durations <- 
  read_excel("../data/raw_data/saa all organizedsession_timeslotABv3.xls", 
             sheet = "Posters1")

# add day-part to compare with papers
poster_session_durations <- 
  poster_session_durations %>% 
  mutate(start = as.character(start)) %>% 
  mutate(end = as.character(end)) %>% 
  mutate(day_part = case_when(
    start == "1899-12-31 08:00:00" ~ "morning",
    start == "1899-12-31 10:30:00" ~ "morning",
    start == "1899-12-31 14:00:00" ~ "afternoon",
    start == "1899-12-31 17:00:00" ~ "evening"
  )) %>% 
  mutate(start = substr(start, 11, nchar(start))) %>% 
  mutate(end = substr(end, 11, nchar(end))) %>% 
  mutate(day_part = glue('{stri_trans_totitle(substr(day, 1, 2))}-{day_part}'))

poster_session_durations

diff_between_need_and_have <- nrow(all_posters_individual_no_ah) - sum(poster_session_durations$'N max')

There are spaces for a total of r sum(poster_session_durations$'N max') posters. We have received r nrow(all_posters_individual_no_ah) poster submissions. So we have r nrow(all_posters_individual_no_ah) - sum(poster_session_durations$'N max') r ifelse(diff_between_need_and_have < 0, glue('{abs(diff_between_need_and_have)} free spaces'), glue('{abs(diff_between_need_and_have)} more posters than available spaces')).

Thu-evening is the 'after hours' session, so that's already got the after-hours poster sessions in there. We'll just omit it from the following available schedules, and we don't need to worry about clashes because it's after all the papers.

# see if we can do the poster day-parts individually

poster_session_durations_no_th_ev <- 
  poster_session_durations %>% 
  filter(day_part != "Th-evening") %>% 
  mutate(poster_day_part = glue('poster-{day_part}')) %>% 
  group_by(poster_day_part) %>% 
  mutate(label = 1:n()) %>% 
  ungroup() %>% 
  mutate(poster_day_part = glue('{poster_day_part}_{label}')) %>% 
  select(-label) %>% 
  mutate(n_max = `N max`)

We're not going to bother with clashes between people giving posters and people giving papers, because they are both 'presenting' roles, and a person can only have one of those.

# check 
people_with_2_non_poster_roles <- 
all_abstracts %>% 
  group_by(`First Name`, `Last Name`) %>% 
  add_tally() %>% 
  filter(n >= 2) %>% 
  filter(!grepl("Poster", `Session Type`)) %>% 
  mutate(uid = glue('{`First Name`} {`Last Name`}'))

people_in_posters_with_2_roles <- 
  all_abstracts %>% 
  group_by(`First Name`, `Last Name`) %>% 
  add_tally() %>% 
  filter(n >= 2) %>% 
  filter(grepl("Poster", `Session Type`)) %>% 
  mutate(uid = glue('{`First Name`} {`Last Name`}'))

# any overlap?
people_with_posters_and_another_role <- 
people_in_posters_with_2_roles$uid[people_in_posters_with_2_roles$uid %in% people_with_2_non_poster_roles$uid]

So let's go right onto the knapsack to see if we can fit them all in.

# number of posters in each session
posters_to_allocate <- 
  all_posters_individual_no_ah %>% 
  group_by(`Session Id`) %>% 
  tally() %>% 
  pull(n)
sum(posters_to_allocate)

# size of each slot to fill with posters
poster_sessions_available <- poster_session_durations_no_th_ev$n_max
sum(poster_session_durations_no_th_ev$n_max)

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(posters_to_allocate, 
                      posters_to_allocate, 
                      poster_sessions_available)

solution$ksack

# poster sessions only
all_posters_individual_no_ah_sessions <- 
  all_posters_individual_no_ah %>% 
  group_by(`Session Id`) %>% 
  select(`Session Id`) %>% 
  slice(1) %>% 
  ungroup() %>% 
  mutate(ksack = solution$ksack)

# put the results back on indiv posters
all_posters_individual_no_ah_ksack <- 
  all_posters_individual_no_ah %>% 
  left_join(all_posters_individual_no_ah_sessions)

all_posters_individual_no_ah_ksack %>% 





# out put the results
write.csv(all_of_the_day_parts, 
          "../data/derived_data/posters_output_of_session_classification.csv")

write.csv(poster_session_durations_no_th_ev, "../data/derived_data/poster_session_durations_no_th_ev.csv")

We have from the SAA spaces for r sum(poster_session_durations_no_th_ev$n_max) posters. And we have r sum(poster_session_durations_no_th_ev$c_posters)

It's tricky to allocate the posters so they don't exceed the number allowed in each day part.

We may need to split up some of the general poster sessions to make it easier to pack them in. What is the distribution of session sizes? What if we chopped in half all the sessions bigger than 12 posters?

We have no clash with paper presenters.

What do we stil need to do?

Colophon

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
devtools::session_info()

The current Git commit details are:

# what commit is this file at? 
git2r::repository("../..")


benmarwick/confschedlr documentation built on May 20, 2019, 4:26 p.m.