In skranz/SeminarMatching: Shiny App for Seminar Assignment

knitr::opts_chunk$set(error = TRUE, echo=FALSE, cache=FALSE)
library(ggplot2)
library(SeminarMatching)
library(dplyr)
library(tidyr)
library(reshape2)

setwd("D:/libraries/SeminarMatching/semapps/shared/reports")
semester = "SS17"
db.dir = paste0("../db")
round = 1

semdb = dbConnect(dbname=paste0(db.dir,"/semDB.sqlite"), drv = SQLite())
matchings = dbGet(semdb,"matchings",nlist(semester))
seminars = dbGet(semdb,"seminars",nlist(semester))
students = dbGet(semdb,"students",nlist(semester))

studpref = dbGet(semdb,"studpref", nlist(semester))

manual = dbGet(semdb,"manual",nlist(semester))

matchings1 = filter(matchings, round==1)

matchings2 = filter(matchings, round==2) %>%
  filter(num_ranked>0)

matchings = matchings1

df = matchings %>% filter(num_ranked>0)

df = mutate(df, found=ifelse(semid!=-1, "found seminar", "no seminar"))

cols = c("semid",setdiff(colnames(seminars),colnames(df)))
df = left_join(df, seminars[,cols,drop=FALSE], by="semid")

cols = c("userid",setdiff(colnames(students),colnames(df)))
df = left_join(df, students[,cols,drop=FALSE], by="userid")

num.studs = length(unique(df$userid))
num.sems = length(unique(df$semid))
num.match = sum(df$semid != -1)
num.empty = sum(df$semid == -1)
total.slots = sum(seminars$slots)

r1fi = df %>% 
  filter(semid>-1) %>%
  group_by(semid) %>%
  summarize(filled_round1=n())

seminars = left_join(seminars,r1fi, by="semid")

# computations for round 2
sp1 = filter(studpref, round==1)
sp2 = filter(studpref, round==2)

found1 = filter(df, round==1, semid > -1)$userid %>%
  unique()

r2_num = length(unique(sp2$userid))


r2_new = length(setdiff(unique(sp2$userid), unique(sp1$userid)))
r2_extra = length(intersect(unique(sp2$userid), unique(found1)))

# adapt manual
#man = manual %>% 
#  group_by(userid) %>%
#  summarize(net_added = # sum(added==1)-sum(added==0)) %>%
#  filter(net_added != 0)

n_man_added = sum(manual$added==1)
n_man_removed = sum(manual$added==0)

Matching Results for `r semester` round `r round`

Note: The report considers only students that have ranked at least 1 seminar.
We have r num.studs students and r num.sems seminars.
r num.match of r total.slots seminar slots have been filled (r round(100*num.match / total.slots)%) and r num.empty students (r round(100*num.empty / num.studs)%) did not get assigned a seminar.
On average a student listed r round(mean(df$num_ranked),1) seminars in the preference list.

Filled and open seminar slots after matching round 1:

seminars %>%
  group_by(semBAMA) %>%
  summarize(num_sems=n(), total_slots=sum(slots,na.rm=TRUE), filled=sum(filled_round1,na.rm=TRUE), open=total_slots-filled)

The following table shows for Bachelor and Master students, how many students found or found not a seminar.

sdf = df %>%
  group_by(studBAMA,found) %>% 
  summarise(n=n()) %>%
  ungroup() %>%
  spread(key = found, value=n) %>%
  replace.na(0) %>%
  as.data.frame()

if (ncol(sdf)==2)
  sdf$not_found  = 0

sdf$total = sdf[,2]+sdf[,3]
sdf

Here are the shares of students that got the seminar that they ranked at n'th position...

df %>%
  filter(num_ranked >0) %>%
  group_by(studBAMA) %>%
  mutate(N=n()) %>%
  group_by(studBAMA,pos, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  ungroup() %>%
  filter(found == "found seminar") %>%
  dcast(pos ~ studBAMA+found, value.var="share") %>%
  mutate(pos = paste0("Ranked ",pos)) %>%
  replace.na("0%")

And now we show how the length of a students' preference lists relates with the share of students that got assigned a seminar:

df %>% 
  group_by(studBAMA,num_ranked) %>% 
  mutate(N=n()) %>%
  group_by(studBAMA,num_ranked, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  dcast(num_ranked ~ studBAMA + found,value.var="share") %>%
  replace.na("0%")

Here are the number of students that rankend num_ranked seminars:

df %>% 
  group_by(studBAMA,num_ranked) %>% 
  summarise(number_students=n()) %>%
  dcast(num_ranked ~ studBAMA,value.var="number_students") %>%
  replace.na(0) %>%
  mutate("Total Number of Students" = Bachelor + Master)

Let us now compare for Bachelor students the seminar matching by their main subject:

df %>% 
  filter(studBAMA=="Bachelor") %>% 
  group_by(studSubject) %>%
  mutate(mean_num_sem_ranked=round(mean(num_ranked),1)) %>%
  group_by(studSubject, mean_num_sem_ranked, found) %>% 
  summarise(n=n()) %>%
  dcast(studSubject + mean_num_sem_ranked ~ found,value.var="n") %>%
  arrange(-`found seminar`) %>%
  replace.na(0)

and the same thing for master students...

df %>% 
  filter(studBAMA=="Master") %>% 
  group_by(studSubject) %>%
  mutate(mean_num_sem_ranked=round(mean(num_ranked),1)) %>%
  group_by(studSubject, mean_num_sem_ranked, found) %>% 
  summarise(n=n()) %>%
  dcast(studSubject + mean_num_sem_ranked ~ found,value.var="n") %>%
  arrange(-`found seminar`) %>%
  replace.na(0)

Filled slots after all matching rounds and manual assignments

seminars %>%
  group_by(semBAMA) %>%
  summarize(num_sems=n(), total_slots=sum(slots), filled=sum(filled_slots), open=total_slots-filled)

Matching round 2

r r2_num students have submitted preferences in round 2.
r r2_num-r2_extra of students did not get a slot in round 1. They either forgot to participate in round 1 (r r2_new students) or submitted a too short preference list in round 1, and now decided to accept also other seminars (r r2_num-r2_extra-r2_new students). Yet, chances are much lower to get a seat in round 2.
r r2_extra of students already got a seminar in round 1, and probably want to take two seminars. In an ideal world without mistakes (like forgetting to participate in round 1), this would be the only reason to participate in round 2.
In total r sum(matchings2$semid>=0) of the r r2_num students got assigned a seminar slot in round 2.

Manually added or removed students

Manually r n_man_added students have been added, and r n_man_removed students have been removed from seminars.

Background: - Because the world is not ideal, after the matching rounds, some students may drop out from the seminar, e.g. because they were dissatisfied with their topic, and then have been removed from the seminar list. - Other students are added, either as replacement for dropped out students, or because of an expansion of slots due to high demand. - Note that the numbers below would only be correct, if all lectures insert all additions and removals in the database and if no student would is added or removed multiple times.

skranz/SeminarMatching documentation built on June 9, 2020, 6:57 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com