knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE,
  message = FALSE,
  echo = FALSE,
  comment = "#>",
  fig.path = "figures"
)

library(confschedlr)
library(readxl)
library(tidyverse)
library(knitr)
library(glue)

Introduction

How many sessions do we have?

library(readxl)
org_sessions <- read_excel("../data/raw_data/Organized Session Abstracts_2018_AB.xlsx")

gen_sessions <- read_excel("../data/raw_data/01 - gen_paper_sessions_FINAL(oct5)_DG_including_moves v2.xlsx")

gen_sessions <- 
gen_sessions %>% 
  mutate(`Session Id` = SESSION) %>% 
  mutate(`Session Id` =  ifelse(`Session Id` == "35B",
          max(as.numeric(`Session Id`), na.rm = TRUE) + 1, 
          as.numeric(`Session Id`) ))

poster_sessions <- 
  org_sessions %>% 
  filter(`Session Type` == "Poster Symposium")

# organised sessions without posters
org_sessions <- 
   org_sessions %>% 
  filter(`Session Type` != "Poster Symposium")

# how many unique organised sessions? 
n_org_sessions <- length(unique(org_sessions$`Session Id`))

# how many unique general sessions? 
n_gen_sessions <- length(unique(gen_sessions$`Session Id`))

# how many unique poster sessions? 
n_pos_sessions <- length(unique(poster_sessions$`Session Id`))

# total number of sessions
total_number_of_sessions <-  n_org_sessions + n_gen_sessions + n_pos_sessions

# combine organised and general sessions
org_sessions$`Session Id` <- as.numeric(org_sessions$`Session Id`)
gen_sessions$`Session Type` <- "Symposium"

# get only columns that are also in the organised sessions data frame
gen_sessions_to_join <- 
gen_sessions %>% 
  select(names(gen_sessions)[names(gen_sessions) %in% names(org_sessions)])

all_nonposter_sessions <- 
  bind_rows(gen_sessions_to_join, org_sessions)

We have this many sessions in each major category, according to my tally of the individual papers:

# table
tribble(            ~'Session type',             ~number,
                      'Organised sessions',        n_org_sessions,
                      'Poster sessions',           n_pos_sessions,
                      'General sessions',          n_gen_sessions)

With a total of r total_number_of_sessions sessions altogether, and a total number of non-poster sessions r total_number_of_sessions - n_pos_sessions.

For scheduling into rooms and time slots, we wont include the posters, because they go in their own schedule in the ballroom.

To help with preventing schedule clashes, we make a unique ID out of the first name and last name to track specific individuals.

all_nonposter_sessions$uid <- 
  with(all_nonposter_sessions, paste0(`First Name`, 
                                                 "_", 
                                                 `Last Name`))

How many 4 hour slots do we have available?

To find out how many 4 h slots we have available, I looked on the grid on 'Rooom&Time_DG.xlsx' and counted up the 4 slots. I did not count poster sessions here, because they are less than 4 h and they are outside of the room schedule. I rounded down a little:

Here's what I got for session slots in each day and part of day:

library(tidyverse)
four_h_slots <- 
  tribble(~part,       ~Th, ~Fr, ~Sa, ~Su,
          "morning",    36,  31,  30,  28,
          "afternoon",  35,  32,  30,  0,
          "evening",    36,  0,    0,  0)

(four_h_slots)
four_h_slots_long <- 
  four_h_slots %>% 
  gather(times, slot, -part)

# how many 4 h slots
n_four_h_slots <-  sum(four_h_slots_long$slot)

So that's a total of r n_four_h_slots 4 hour slots, in which we have to fit all the organised and general sessions. This does not include the posters.

What are the durations of the sessions?

We've received information from the SAA about the length of specific sessions, so let's use that:

saa_info_on_organised_session_duration <- 
  read_excel("../data/raw_data/saa all organizedsession_timeslotABv3.xls", sheet = 2) %>% 
  mutate(`Session Id` = `SESSION ID`)

computed_general_session_duration <- 
  gen_sessions %>% 
  group_by(`Session Id`) %>% 
  summarise(n_papers = n()) %>% 
  mutate(TIME = n_papers * 0.25) %>%  # 15 mins per paper
  mutate(`Session Id` =  ifelse(`Session Id` == "35B",
          max(as.numeric(`Session Id`), na.rm = TRUE) + 1, 
          as.numeric(`Session Id`) ))

# combine general and organised session durations
org_and_gen_session_durations <- 
  saa_info_on_organised_session_duration %>% 
  filter(!is.na(`Session Id`)) %>% 
  mutate(n_papers = `N papers`,
         TIME = as.numeric(TIME)) %>% 
  select(`Session Id`, TIME, n_papers) %>% 
  rbind(computed_general_session_duration) %>% 
  filter(!is.na(TIME)) %>% 
  # add 15 m min  to each session for a changeover time
  mutate(TIME = if_else(TIME == 4.0, 
                        TIME,
                        TIME + 0.25))

head(org_and_gen_session_durations)

# join the session duration onto each paper

all_nonposter_sessions_with_session_durations <- 
  all_nonposter_sessions %>% 
  mutate(`Session Id` = as.numeric(`Session Id`)) %>% 
  left_join(org_and_gen_session_durations) 

head(all_nonposter_sessions_with_session_durations)

How can we prevent one person scheduled in two simultaneous sessions?

Because we will have many parallel sessions, we need to take care that we don't put one person in two simultaneous sessions.

We start by taking a day-part, assign a session to it, and then looping over that day-part to add another session, and another, and so on. Each time we add a session to that day-part, we check to see if the names in the incoming session already exist in the sessions we previously added to this day-part. If a name has already been added to that day-part, then we skip adding the session, and move to the next session. We keep doing this until the tally of hours of all the sessions in the day-part matches the total number of hours we have for that day-part.

We can compute the total capacity in hours of each day-part like this:

four_h_slots_long$cumsum <- cumsum(four_h_slots_long$slot)

xx <- 
four_h_slots_long %>% 
  mutate(day_part = paste0(times, "-", part)) %>% 
  select(-part, -times) %>% 
  rename(end = cumsum) %>% 
  filter(slot != 0)

# get the number of the start slot to go with the number of the end slot
 xx$start <- 0
for(i in 1:(nrow(xx))) {
  xx$start[i] <- 
    if_else(i == 1, 
           1,
            as.numeric(xx[ i-1, 2]) + 1)
}
# each slot is 4 hours long
total_cap <- 
  xx %>% 
  mutate(total_capacity = slot * 4) %>% 
  select(day_part, slot, total_capacity)

# View
total_cap

Now we will take session from the big table of sessions and add it into, for example, Th-morning. We'll keep adding more sessions until we reach the capacity of that day-part.

But, we also need to loop over the sessions as we add more to the day-part to check to see if a person's name is already there or not. If the name is already there, we'll skip adding that session, and try the next session.

# prepare some variables... get all the sessions, 
# but we're going to modify this, so make a copy
org_and_gen_session_durations$session_time_allowed <- 
  as.numeric(org_and_gen_session_durations$TIME)
all_nonposter_sessions_with_session_durations$session_time_allowed <- 
  all_nonposter_sessions_with_session_durations$TIME
z7 <- # just the sessions IDs
  all_nonposter_sessions_with_session_durations %>% 
  group_by(`Session Id`) %>% 
  distinct(`Session Id`)

all_of_the_day_parts <- NULL
total_cap$hours_of_sessions <- 0
total_cap$n_sessions <- 0
total_cap$c_sessions <- 0
container_for_this_day_part_with_day_from_while <- 0
i <- 1

# start with each day-part...
for(i in seq_len(nrow(total_cap))) {

  # stop loop gracefully when we have assigned all people to day-parts
  if (nrow(z7) == 0){ break }

  # how many rows (people) to we have remaning to allocate to a day-part?
  print(glue('number of people remaining to classify is {nrow(z7)}'))

  # add some buffer time to nudge it back so we don't overfill a day-part
  # maybe if this buffer is bigger, the knapsack will be faster?
  # A big enough buffer should avoid the 0 group in the knapsack results
  # which indicates that the knapsack fn can't fit all the sessions in the
  # day-part
  buffer <- 30
  target_capacity <- total_cap$total_capacity[i] - buffer

  # prepare a while statement that will take a session, 
  # check to see if there is a name clash with sessions,
  # skip adding it if there is clash, then check to see
  # the cumulative duration of the sessions in the day-part 
  # has exceeded the total duration of the day-part,
  # and stop adding sessions if it has.

# make sure some variables are empty
cumulative_duration_of_sessions   <-  0
container_for_this_day_part <- 0
all_the_people_in_the_session <- 0
j <- 1

while(cumulative_duration_of_sessions <= target_capacity & 
      j <= nrow(z7)) {

    # randomly reorder the rows to see if we can solve a problem
    # z7 <- z7[sample(nrow(z7)),]
    get_a_session <- z7[j, ]

    # get the names of all the people in this session...
    all_the_people_in_this_session <- 
      all_nonposter_sessions_with_session_durations %>% 
      filter(`Session Id` == get_a_session$`Session Id`)

    # get names from what we already have added to this day-part
     if (j == 1) {
      # do nothing is this is the first incoming session 
       distinct_names_for_each_session_so_far <- 0
    } else {
      # get distinct names for each session previously added
      # to this day-part
      # (don't care about same name multiple times in one session)
      distinct_names_for_each_session_so_far <- 
      container_for_this_day_part %>% 
        group_by(uid) %>% 
        distinct(uid, .keep_all = TRUE) %>% 
        pull(uid)
    }

    # compare names already in here to names we are about to add
    if (j == 1) {
      # do nothing 
      names_already_in <- 0
      } else {
      # check to see if incoming session has names that already exist in this day_part
      names_already_in <- sum(all_the_people_in_this_session$uid %in% distinct_names_for_each_session_so_far)
      }

    # accumulate the incoming session onto the sessions
    # we've already added to this day-part, if there are no
    # name conflicts
    if (j == 1) {
       container_for_this_day_part <- 
        all_the_people_in_this_session
      } else {
      if (names_already_in == 0) {
      # if there are no name clashes, 
      # add these names to previous names in this day-part

      # bind rows of session that has no clashing  names with 
      # sessions we already added to this day-part
      container_for_this_day_part <- 
        bind_rows(all_the_people_in_this_session,
                  container_for_this_day_part)
    }  else {

        # do not add this session to the day-part
      }
    }

    # compute the duration of session that we've added so far to 
    # this day-part
    cumulative_duration_of_sessions <- 
      container_for_this_day_part %>% 
      distinct(`Session Id`, .keep_all = TRUE) %>% 
      pull(session_time_allowed) %>% 
      sum

    # store this result
    total_cap$hours_of_sessions[i] <- cumulative_duration_of_sessions

    # show a check of what's happening
    if (j == 1){
    progress_check <-    
      data_frame(i = i, 
                 j = j,
               'cumulative duration' = cumulative_duration_of_sessions,
               'sessions added' =  length(unique(container_for_this_day_part$`Session Id`)),
               people =  nrow(container_for_this_day_part)
                )
    } else {
      progress_check <- bind_rows(progress_check, 
                                  data_frame(i = i,
                                             j = j,
               'cumulative duration' = cumulative_duration_of_sessions,
               'sessions added' =  length(unique(container_for_this_day_part$`Session Id`)),
               people =  nrow(container_for_this_day_part)
                ))
    }

       print(progress_check)

       # move to next session
       j <- j + 1
    } 
# end of the while loop

 # add an identifier so we can see where we have assigned each session so far
 container_for_this_day_part_with_day_from_while <- 
   container_for_this_day_part %>% 
   mutate(day_part_from_while  = as.character(total_cap$day_part[i]))

 # bind rows to previous day-parts we might have done
  if (i == 1) {

  all_of_the_day_parts <- container_for_this_day_part_with_day_from_while

  } else {

    # check to see

  all_of_the_day_parts <- bind_rows(container_for_this_day_part_with_day_from_while, 
                                      all_of_the_day_parts)
  }

 # # progress check
 print(glue('i = {i}'))
 # 
 print(
 all_of_the_day_parts %>%
   group_by(day_part_from_while) %>%
   summarise(papers = n(),
             sessions = length(unique(`Session Id`))))

 total_cap$n_sessions[i] <- length(unique(container_for_this_day_part_with_day_from_while$`Session Id`))

 total_cap$c_sessions[i] <-  sum(total_cap$n_sessions)

 print(total_cap)

 # for the next day-part, we need to make sure
 # we don't add the same session twice to different day-parts
 # so let's update our big table of abstracts, and drop the session that 
 # we've already got in the day-part

 z7 <-  
   z7 %>% 
   filter(! `Session Id` %in% unique(container_for_this_day_part$`Session Id`))

}
# end of for loop

# out put the results
write.csv(all_of_the_day_parts, 
          "../data/derived_data/output_of_day_part_classification.csv")

write.csv(total_cap, "../data/derived_data/total_cap.csv")
# read them in, if we need to
all_of_the_day_parts <- readr::read_csv("../data/derived_data/output_of_day_part_classification.csv")

total_cap <- readr::read_csv("../data/derived_data/total_cap.csv")

Did we get all sessions allocated to a day-part?

sessions_we_missed <- 
  setdiff(unique(all_nonposter_sessions$`Session Id`),
          unique(all_of_the_day_parts$`Session Id`))

We missed r length(sessions_we_missed) sessions: r sessions_we_missed.

What did we get from this process? The table below shows each day-part, the number of hours that we have available to fill (total_capacity) and the number of hours that we have filled so far (hours_of_sessions):

total_cap %>% 
  rename(`Number of sessions` = n_sessions,
         `Cumulative number of sessions` = c_sessions)

Have we succeeded in avoiding placing one person in two sessions scheduled in the same day-part?

Let's see if there are any names that occur in two or more distinct sessions in the same day-part.

# check to see what we got from the loop...
sessions_per_day <- 
all_of_the_day_parts %>% 
  group_by(day_part_from_while) %>% 
  summarise(n_sessions = length(unique(`Session Id`))) 

hours_per_day <- 
  all_of_the_day_parts %>% 
  group_by(day_part_from_while, `Session Id`) %>% 
  slice(1) %>% 
  summarise(n_hours = sum(session_time_allowed)) %>% 
  ungroup() %>% 
  group_by(day_part_from_while) %>% 
  summarise(n_hours = sum(n_hours)) 

n_missing_papers <- 
  nrow(all_nonposter_sessions_with_session_durations) - nrow(all_of_the_day_parts)

n_missing_sessions <- 
  length(unique(all_nonposter_sessions_with_session_durations$`Session Id`)) - length(unique(all_of_the_day_parts$`Session Id`))  

missing_sessions <- 
  unique(all_nonposter_sessions_with_session_durations$`Session Id`)[!(unique(all_nonposter_sessions_with_session_durations$`Session Id`) %in% unique(all_of_the_day_parts$`Session Id`)) ]

# have we succeeded with no name clashes?
# in each day-part, do any names occur more than once?

# how to get it? testing...
ex <- 
tribble(~'day_part', ~'sess', ~'auth',
        "th-m",       1,        'al', 
        "th-a",       2,        'be',
        "th-a",       3,        'be',
        "th-e",       4,        'ce',
        "fr-a",       5,        'de',
        "fr-a",       5,        'de')

out <- 
ex %>% 
  group_by(day_part, auth) %>% 
  filter(n() > 1) %>% 
  group_by(sess) %>% 
  filter(n() == 1) 

# deploy...
clash <- 
all_of_the_day_parts %>% 
  # get authors that have multiple rows (roles or sessions) day-part
  group_by(day_part_from_while, uid) %>% 
  filter(n() > 1  ) %>% 
  # get rows where 
  group_by(`Session Id`) %>% 
  filter(n() == 1) # get rows where there are names that occur in more than one session in each day-part

Currently we have r nrow(clash) schedule clashes, that is, people who are scheduled to be in two or more sessions that are scheduled in the same day-part. That's r ifelse(nrow(clash) == 0, "great", "not ideal").

Organise the sessions in each day-part into 4 hour slots

Now that we have allocated all the organised sessions into day-parts, we need to group them into 4 hour slots so we can assign them to specific rooms.

We can use the mulitple knapsack algorithm for this. For example, Thurs morning has 36 slots of 4 hours each.

We have a series of functions, one for each day-part, then we run them, and write the output to CSV.

# thu-morning

thu_mo <- function(){

# get the slots available for this day-part
thu_morning_slots <- 
  total_cap %>% 
  filter(day_part == 'Th-morning') %>% 
  pull(slot)

# get the session id's assigned to this day-part
thu_morning_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Th-morning') %>% 
  slice(1)

slots_to_fill <- rep(4, thu_morning_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- thu_morning_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

thu_morning_sessions$computed_slot <- solution$ksack

write.csv(thu_morning_sessions,
          "../data/derived_data/thu_morning_sessions.csv")
}

# Here's thursday afternoon:

# thu-afternoon
thu_af <- function(){

# get the slots available for this day-part
thu_afternoon_slots <- 
  total_cap %>% 
  filter(day_part == 'Th-afternoon') %>% 
  pull(slot)

# get the session id's assigned to this day-part
thu_afternoon_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Th-afternoon') %>% 
  slice(1)

slots_to_fill <- rep(4, thu_afternoon_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- thu_afternoon_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

thu_afternoon_sessions$computed_slot <- solution$ksack

write.csv(thu_afternoon_sessions, 
          "../data/derived_data/thu_afternoon_sessions.csv")
}

# Here's thursday evening:

# thu-evening

thu_ev <- function(){

# get the slots available for this day-part
thu_evening_slots <- 
  total_cap %>% 
  filter(day_part == 'Th-evening') %>% 
  pull(slot)

# get the session id's assigned to this day-part
thu_evening_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Th-evening') %>% 
  slice(1)

slots_to_fill <- rep(4, thu_evening_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- thu_evening_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

thu_evening_sessions$computed_slot <- solution$ksack

write.csv(thu_evening_sessions, 
          "../data/derived_data/thu_evening_sessions.csv")
}


# Here's friday morning:

# fri-morning

fri_mo <- function(){

# get the slots available for this day-part
fri_morning_slots <- 
  total_cap %>% 
  filter(day_part == 'Fr-morning') %>% 
  pull(slot)

# get the session id's assigned to this day-part
fri_morning_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Fr-morning') %>% 
  slice(1)

slots_to_fill <- rep(4, fri_morning_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- fri_morning_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

fri_morning_sessions$computed_slot <- solution$ksack

write.csv(fri_morning_sessions, 
          "../data/derived_data/fri_morning_sessions.csv")
}

# Here's friday afternoon:

# fri-afternoon

fri_af <- function(){

# get the slots available for this day-part
fri_afternoon_slots <- 
  total_cap %>% 
  filter(day_part == 'Fr-afternoon') %>% 
  pull(slot)

# get the session id's assigned to this day-part
fri_afternoon_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Fr-afternoon') %>% 
  slice(1)

slots_to_fill <- rep(4, fri_afternoon_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- fri_afternoon_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

fri_afternoon_sessions$computed_slot <- solution$ksack

write.csv(fri_afternoon_sessions, 
          "../data/derived_data/fri_afternoon_sessions.csv")
}

# Here's Saturday morning:

# sat-morning
sat_mo <- function(){

# get the slots available for this day-part
sat_morning_slots <- 
  total_cap %>% 
  filter(day_part == 'Sa-morning') %>% 
  pull(slot)

# get the session id's assigned to this day-part
sat_morning_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Sa-morning') %>% 
  slice(1)

slots_to_fill <- rep(4, sat_morning_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- sat_morning_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

sat_morning_sessions$computed_slot <- solution$ksack

write.csv(sat_morning_sessions, 
          "../data/derived_data/sat_morning_sessions.csv")
}

# Here's saturday afternoon:

# sat-afternoon

sat_af <- function(){

# get the slots available for this day-part
sat_afternoon_slots <- 
  total_cap %>% 
  filter(day_part == 'Sa-afternoon') %>% 
  pull(slot)

# get the session id's assigned to this day-part
sat_afternoon_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Sa-afternoon') %>% 
  slice(1)

slots_to_fill <- rep(4, sat_afternoon_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- sat_afternoon_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

sat_afternoon_sessions$computed_slot <- solution$ksack

write.csv(sat_afternoon_sessions, 
          "../data/derived_data/sat_afternoon_sessions.csv")
}

# Here's sunday morning:

# sun-morning
sun_mo <- function(){

# get the slots available for this day-part
sun_morning_slots <- 
  total_cap %>% 
  filter(day_part == 'Su-morning') %>% 
  pull(slot)

# get the session id's assigned to this day-part
sun_morning_sessions <- 
  all_of_the_day_parts %>% 
  group_by(`Session Id`) %>% 
  filter(day_part_from_while == 'Su-morning') %>% 
  slice(1)

slots_to_fill <- rep(4, sun_morning_slots) * 60 # so many 4 hour slots that we need to fill

sessions_we_have <- sun_morning_sessions$session_time_allowed * 60

library(adagio)
# mknapsack calling signature is: mknapsack(values, weights, capacities)
solution <- mknapsack(sessions_we_have, 
                      sessions_we_have,
                      slots_to_fill)

sun_morning_sessions$computed_slot <- solution$ksack

write.csv(sun_morning_sessions, 
          "../data/derived_data/sun_morning_sessions.csv")
}
# run all of these
thu_mo()
thu_af()
thu_ev()
fri_mo()
fri_af()
sat_mo()
sat_af()
sun_mo()

# why do these sometime have a 0 group? Because the fn can't fit it all in! So we need to add more buffer to each day-part in the while loop

Resolving conflicts with requested times

Now let's get all the names of the people in each day-part to check for the special requests that a few people have made:

# read in the day-part data from the while-loop
sessions_in_day_parts <- list.files("../data/derived_data", pattern = "_sessions.csv", full.names = TRUE)
sessions_in_day_parts_data <- 
  map(sessions_in_day_parts, 
       ~read_csv(.x))

# for each day-part, join with the all names of the people in each session
sessions_in_day_parts_data_with_all_names <- 
  map(sessions_in_day_parts_data,
      ~.x %>% 
        left_join(all_of_the_day_parts, by = 'Session Id')
      )

# get day-parts to use in file names
day_parts_of_list <- 
  map_chr(sessions_in_day_parts_data_with_all_names, 
    ~unique(.x$day_part_from_while.y))

# write these out to CSV
walk2(sessions_in_day_parts_data_with_all_names, 
     day_parts_of_list,
     ~write_csv(.x,  path = glue("../data/derived_data/{.y}-all-names.csv")))

# any computer slots with 0?
sessions_in_day_parts_data_with_all_names_df <- 
bind_rows(sessions_in_day_parts_data_with_all_names)

sessions_in_day_parts_data_with_all_names_df %>% 
  filter(computed_slot == 0)

Sat morning is a focus of many of the special requests, let's have a look at that

Sa_morning_names <- sessions_in_day_parts_data_with_all_names[[which(day_parts_of_list == "Sa-morning")]]

# read in the sheet on conflicts
conflicts <- read_excel("../data/raw_data/Conflicts.xlsx")
# make a unique name variable
conflicts$uid <- glue('{conflicts$PRENOM}_{conflicts$NOM}')
# get the ones for Sa-morning
Sa_morning_conflicts <- 
  conflicts %>% 
  filter(grepl("Sa", CONFLICTS))

# check to see if we have a problem
Sa_morning_conflict_result <- Sa_morning_names$uid.y[Sa_morning_names$uid.y %in% Sa_morning_conflicts$uid]

# double-check
double_check <- 
Sa_morning_names[Sa_morning_names$`Last Name.y` %in% Sa_morning_conflicts$NOM, ]
# it's ok

We have r length(Sa_morning_conflict_result) conflicts on Saturday morning, that's r ifelse(length(Sa_morning_conflict_result) == 0, "great", "not ideal")

What about conflicts in other day-parts?

other_conflicts <- 
  conflicts %>% 
  filter(!grepl("Sa", CONFLICTS))

There are just r nrow(other_conflicts), so let's do it one by one.

  1. Deborah Nichols doesn't want to be in Thurs AM, Fri AM. When do we have her?
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(uid.y == "Deborah_Nichols") %>% 
  select(`Session Id`, uid.y, day_part_from_while.y )

Sa-afternoon and Sa-morning, she's ok, great!

  1. Leah McCurdy and session 3109, but what is the conflict exactly? Not on Sat morning from 9-11 am.
Sa_morning_names %>% 
  filter(uid.y == "Leah_McCurdy")

# when is she?
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(uid.y == "Leah_McCurdy") %>% 
  select(day_part_from_while.y)

She's ok, on Fr-morning

  1. Session 2929 should not be on "Thurs PM, Thurs EVE, Fri PM, SAT AM, SAT PM". When do we have it?
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(`Session Id` == 2929) %>% 
  select(`Session Id`, uid.y, day_part_from_while.y )

This is a problem, we put this session in Th-afternoon.

Can we tack it onto Su-morning, right at the end? According to the results of our while-loop, we have many hours spare on Su-morning:

total_cap

Let's double-check what we've got after the multiple knapsack work:

day_parts_filled_hours <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  group_by(`Session Id`) %>% 
  slice(1) %>% 
  group_by(day_part_from_while.y) %>% 
  summarise(hours_in_day_part = sum(session_time_allowed.y ))

It's the same. So how many spare hours do we have on Su-morning?

Su_morning_hours_available <- 
total_cap %>% 
  filter(day_part == "Su-morning") %>% 
  select(total_capacity) %>% 
  pull()

Su_morning_hours_filled <- 
  day_parts_filled_hours %>% 
  filter(day_part_from_while.y == "Su-morning") %>% 
  select(hours_in_day_part) %>% 
  pull()

Su_morning_spare_hours <- 
  Su_morning_hours_available - Su_morning_hours_filled

We have r Su_morning_spare_hours hours unused on Sunday morning. Heaps of space. So let's move Session 2929 to Sunday morning. Don't forget to update the computed slot value so it gets it's own slot.

# what is the max computed_slot for Su-morning?
max_computed_slot_Su_morning <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(day_part_from_while.y == "Su-morning") %>% 
  summarise(max_computed_slot_Su_morning = max(computed_slot)) %>% 
  pull(max_computed_slot_Su_morning)

sessions_in_day_parts_data_with_all_names_df <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  mutate(day_part_from_while.y = if_else(`Session Id` == 2929, 
                                         "Su-morning", 
                                         day_part_from_while.y)) %>% 
  mutate(computed_slot = ifelse(`Session Id` == 2929, 
                                         max_computed_slot_Su_morning + 1, 
                                         computed_slot))

# check it
check_session_2929_to_sunday_morning <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(`Session Id` == 2929) %>% 
  select(day_part_from_while.y) %>% 
  unique()

# check to see if there are conflicts with names, should be 0
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(day_part_from_while.y == "Su-morning") %>%
  group_by(`Session Id`, uid.y)  %>% 
  add_tally() %>% 
  distinct(`Session Id`) %>% 
  group_by(uid.y) %>% 
  filter(n() > 1 )

# how many slots in Sun?

# available?
total_cap #28 

# used?
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(day_part_from_while.y == "Su-morning") %>% 
  distinct(computed_slot) %>% 
  max()
# 21

What do we stil need to do?

  • row 10 has session 2929 sharing with session 3165. But session 2929 needs 16 slots, which is 4 hours.

Problem was that I forgot to update the computed slot value. Fixed now.

  • Missing 3115, 3147, 3175, 3116, 3152, 3162
all_nonposter_sessions %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162")) 
# yes, there

# how many sessions""
length(unique(all_nonposter_sessions$`Session Id`)) #  271

all_nonposter_sessions_with_session_durations  %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162")) 
# yes

org_and_gen_session_durations  %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162")) 
# yes

all_of_the_day_parts  %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162")) 
# not there

sessions_in_day_parts_data_with_all_names_df %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162"))
# not there

# how many sessions?
length(unique(sessions_in_day_parts_data_with_all_names_df$`Session Id`)) # 265


# are these sessions very long?
hours_of_missing_sessions <- 
all_nonposter_sessions %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162"))  %>% 
  group_by(`Session Id`) %>% 
  tally() %>% 
  mutate(minutes = n * 15) %>% 
  mutate(hours = minutes / 60)
# nope
total_hours_missing <-  sum(hours_of_missing_sessions$hours)

# what are they about?
all_nonposter_sessions %>% 
  filter(`Session Id` %in% missing_sessions) %>% 
  select(Abstract, Title)
# no title or abstracts!
# So, no idea what's going on with these. Let's see if there are any problems with putting them on Sunday
missing_sessions_details <- 
  all_nonposter_sessions %>% 
  filter(`Session Id` %in% c("3115", "3147", "3175", "3116", "3152", "3162")) %>% 
  mutate(uid.y = uid)

# are any people in more than one session Sunday? yes
# what about all the day-parts with spare hours
clashes_on_day_x <- function(day) {
  sessions_in_day_parts_data_with_all_names_df %>% 
 filter(day_part_from_while.y == day) %>% 
  bind_rows(missing_sessions_details) %>% 
  group_by(`Session Id`, uid.y)  %>% 
  add_tally() %>% 
  distinct(`Session Id`) %>% 
  group_by(uid.y) %>% 
  filter(n() > 1 )
}

total_cap_remaining <- 
  total_cap %>% 
  mutate(hours_unused =  total_capacity - hours_of_sessions) %>% 
  select(day_part, hours_unused)


day_with_spare_hours <- c("Fr-morning",
                          "Fr-afternoon",
                          "Sa-morning",
                          "Sa-afternoon",
                          "Su-morning")

total_cap_remaining %>% 
  mutate(clashes = map(day_part, clashes_on_day_x)) %>% 
  mutate(n_clashes = map_int(.$clashes, ~nrow(.x)))

# Looks like Th-morning has no clashes, double-check, should be 0

 sessions_in_day_parts_data_with_all_names_df %>% 
  filter(day_part_from_while.y ==  "Th-morning") %>% 
  bind_rows(missing_sessions_details) %>% 
  group_by(`Session Id`, uid.y)  %>% 
  add_tally() %>% 
  distinct(`Session Id`) %>% 
  group_by(uid.y) %>% 
  filter(n() > 1 )

Seems that Th-morning is a good place to put these missing sessions. So let's do that. We'll have to double-up in some slots

# find out how many hours we have for each slot on th-morn
slot_on_thu_morn_with_less_than_4_h <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  filter(day_part_from_while.y == "Th-morning") %>% 
  distinct(`Session Id`, .keep_all = TRUE) %>% 
  group_by(computed_slot) %>% 
  summarise(total_duration = sum(session_time_allowed.y)) %>% 
  filter(total_duration < 4) %>% 
  mutate(minutes = total_duration * 60) %>% 
  mutate(minutes_free = (4*60) - minutes)

# how many hours do we need to put in there?
minutes_of_missing_sessions <- 
  hours_of_missing_sessions$minutes

# # what is the max computed_slot for Su-morning?
# max_computed_slot_Th_morning <- 
# sessions_in_day_parts_data_with_all_names_df %>% 
#   filter(day_part_from_while.y == "Th-morning") %>% 
#   summarise(max_computed_slot_Th_morning = max(computed_slot)) %>% 
#   pull(max_computed_slot_Th_morning)
# 
# # assign missing sessions to Th morn, update slots
# missing_sessions_details <- 
# missing_sessions_details %>% 
#   mutate(day_part_from_while.y = "Th-morning")
# 
# missing_sessions_details_indices  <- 
# missing_sessions_details %>% 
#   group_by(`Session Id`) %>% 
#   group_indices()
# 
# # update computed slot
# missing_sessions_details <- 
#   missing_sessions_details %>% 
#     mutate(computed_slot = missing_sessions_details_indices + max_computed_slot_Th_morning) 

We can't put them all on Th morn, there's not slot with enough free space to fit more than one of the missing sessions in

# # bind to rest of the papers
# sessions_in_day_parts_data_with_all_names_df <- 
#   bind_rows(sessions_in_day_parts_data_with_all_names_df ,
#             missing_sessions_details)
#             
# 
# # check it, should be Th-morning
# check_missing_sessions_to_thu_morning <- 
# sessions_in_day_parts_data_with_all_names_df %>% 
#   filter(`Session Id` %in% missing_sessions) %>% 
#   select(day_part_from_while.y) %>% 
#   unique()
# 
# # check for conflicts with names
#  sessions_in_day_parts_data_with_all_names_df %>% 
#   filter(day_part_from_while.y ==  "Th-morning") %>% 
#   group_by(`Session Id`, uid.y)  %>% 
#   add_tally() %>% 
#   distinct(`Session Id`) %>% 
#   group_by(uid.y) %>% 
#   filter(n() > 1 )

Summary

Now we've got all the organised sessions scheduled into slots and day-parts with no clashes, and no conflicts with requested times.

# this is our final result
write_csv(sessions_in_day_parts_data_with_all_names_df,
          "../data/derived_data/sessions_in_day_parts_data_with_all_names_df.csv")

# can we make a visual schedule?
x <- 
sessions_in_day_parts_data_with_all_names_df %>% 
  group_by(day_part_from_while.y, computed_slot) %>% 
  summarise(sessions_for_plot = toString(unique(`Session Id`)))

# put day-slots in order
x$day_part_from_while.y <- factor(x$day_part_from_while.y, 
                                  levels = total_cap$day_part)

ggplot(x,
       aes(day_part_from_while.y,
           computed_slot)) +
 geom_tile(colour = "black",
           fill = "white") +
  geom_text(aes(label = sessions_for_plot), 
                  size = 3) +
    scale_y_continuous(breaks = 0:max(x$computed_slot)) +
  theme_bw() +
  xlab("") +
  ylab("Room-ish") +
  ggtitle("Visual schedule for SAA2019 conference non-poster sessions",
          subtitle = "The numbers on the plot are the Session Ids. \n We can think of  each row as equivalent to a room at the conference venue. ")

ggsave("../figures/visual_schedule_for_saa2019_conference_non_poster_sessions.png")

# make a spreadsheet that looks like this plot

library(tidyr)
visual_schedule_plot_as_spreadsheet <- 
  x %>% 
  select(day_part_from_while.y, 
         sessions_for_plot, 
         computed_slot) %>% 
  distinct(sessions_for_plot, 
           .keep_all = TRUE) %>% 
  spread(day_part_from_while.y, 
         sessions_for_plot, 
         fill = "") 

write_csv(visual_schedule_plot_as_spreadsheet,
          "../data/derived_data/visual_schedule_plot_as_spreadsheet.csv")

Colophon

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
devtools::session_info()

The current Git commit details are:

# what commit is this file at? 
git2r::repository("../..")


benmarwick/confschedlr documentation built on May 20, 2019, 4:26 p.m.