knitr::opts_chunk$set(error = TRUE, echo=FALSE, cache=FALSE)
semester = "SS15"
db.dir = paste0("../db")
semdb = dbConnect(dbname=paste0(db.dir,"/semDB.sqlite"), drv = SQLite())
matchings = dbGet(semdb,"matchings",nlist(semester))
seminars = dbGet(semdb,"seminars",nlist(semester))
students = dbGet(semdb,"students",nlist(semester))
df = matchings
df = mutate(df, found=ifelse(semid!=-1, "found seminar", "no seminar"))

cols = c("semid",setdiff(colnames(seminars),colnames(df)))
df = left_join(df, seminars[,cols,drop=FALSE], by="semid")

cols = c("userid",setdiff(colnames(students),colnames(df)))
df = left_join(df, students[,cols,drop=FALSE], by="userid")

num.studs = length(unique(df$userid))
num.sems = length(unique(df$semid))
num.match = sum(df$semid != -1)
num.empty = sum(df$semid == -1)
total.slots = sum(seminars$slots)
sdf = df %>%
  group_by(studBAMA,found) %>% 
  summarise(n=n()) %>%
  ungroup() %>%
  spread(key = found, value=n) %>%

sdf$total = sdf[,2]+sdf[,3]

Here are the shares of Bachelor students that got the seminar that they ranked at n'th position...

sdf = df %>% 
  filter(studBAMA=="Bachelor") %>%
  mutate(N=n()) %>%
  group_by(studBAMA,pos, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  ungroup() %>%
  mutate(pos = paste0("Ranked ",pos)) %>%
  spread(key = pos, value=share)


and here the same for Master students...

sdf = df %>% 
  filter(studBAMA=="Master") %>%
  mutate(N=n()) %>%
  group_by(studBAMA,pos, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  ungroup() %>%
  mutate(pos = paste0("Ranked ",pos)) %>%
  spread(key = pos, value=share)


In an ideal world for students, every student gets his first ranked seminar. The rows where found equals no seminar describe the share of students who did not get a seminar and have ranked one seminar less than the rank column (i.e. the option no seminar is ranked as the rank column indicates). If students have ranked only very few seminars it may not be surprising that they did not get a seminar. It is not so good if students who have ranked a lot of seminars still do not get a seminar.

Let us show it graphically

sdf = df %>% 
  group_by(studBAMA) %>%
  mutate(N=n()) %>%
  group_by(studBAMA,pos, found) %>% 
  summarise(share = round(100*n() / first(N),1))

ggplot(sdf, aes(share,x=pos, fill=found)) + 
  geom_bar(stat = "identity") +
  ggtitle("Share of students that got their n'th ranked seminar (or no seminar)")

And now we show how the length of a students' preference lists relates with the share of students that got assigned a seminar:

sdf = df %>% 
  group_by(studBAMA,num_ranked) %>% 
  mutate(N=n()) %>%
  group_by(studBAMA,num_ranked, found) %>% 
  summarise(share = round(100*n() / first(N),1))

ggplot(sdf, aes(share,x=found, fill=found)) + 
  geom_bar(stat = "identity") +
  ggtitle("Share of students that got seminar depending on length of preference list")

Let us now graphically compare for Bachelor students the seminar matching by their main subject:

sdf = df %>% filter(studBAMA=="Bachelor") %>% group_by(studSubject, found) %>% summarise(n=n()) 

ggplot(sdf, aes(y=n,x=found, fill=found)) + 
  geom_bar(stat = "identity") +
  ggtitle("Bachelor: Assigned Seminars by first Subject")

and the same thing for master students...

sdf = df %>% filter(studBAMA=="Master") %>% group_by(studSubject, found) %>% summarise(n=n()) 

ggplot(sdf, aes(y=n,x=found, fill=found)) + 
  geom_bar(stat = "identity") +
  ggtitle("Master: Assigned Seminars by first subject")

