knitr::opts_chunk$set(error = TRUE, echo=FALSE, cache=FALSE)
library(ggplot2)
library(SeminarMatching)
library(dplyr)
library(tidyr)
library(reshape2)
setwd("D:/libraries/SeminarMatching/semapps/shared/reports")
semester = "SS17"
db.dir = paste0("../db")
round = 1

semdb = dbConnect(dbname=paste0(db.dir,"/semDB.sqlite"), drv = SQLite())
matchings = dbGet(semdb,"matchings",nlist(semester))
seminars = dbGet(semdb,"seminars",nlist(semester))
students = dbGet(semdb,"students",nlist(semester))

studpref = dbGet(semdb,"studpref", nlist(semester))

manual = dbGet(semdb,"manual",nlist(semester))
matchings1 = filter(matchings, round==1)

matchings2 = filter(matchings, round==2) %>%
  filter(num_ranked>0)

matchings = matchings1

df = matchings %>% filter(num_ranked>0)

df = mutate(df, found=ifelse(semid!=-1, "found seminar", "no seminar"))

cols = c("semid",setdiff(colnames(seminars),colnames(df)))
df = left_join(df, seminars[,cols,drop=FALSE], by="semid")

cols = c("userid",setdiff(colnames(students),colnames(df)))
df = left_join(df, students[,cols,drop=FALSE], by="userid")

num.studs = length(unique(df$userid))
num.sems = length(unique(df$semid))
num.match = sum(df$semid != -1)
num.empty = sum(df$semid == -1)
total.slots = sum(seminars$slots)

r1fi = df %>% 
  filter(semid>-1) %>%
  group_by(semid) %>%
  summarize(filled_round1=n())

seminars = left_join(seminars,r1fi, by="semid")

# computations for round 2
sp1 = filter(studpref, round==1)
sp2 = filter(studpref, round==2)

found1 = filter(df, round==1, semid > -1)$userid %>%
  unique()

r2_num = length(unique(sp2$userid))


r2_new = length(setdiff(unique(sp2$userid), unique(sp1$userid)))
r2_extra = length(intersect(unique(sp2$userid), unique(found1)))

# adapt manual
#man = manual %>% 
#  group_by(userid) %>%
#  summarize(net_added = # sum(added==1)-sum(added==0)) %>%
#  filter(net_added != 0)

n_man_added = sum(manual$added==1)
n_man_removed = sum(manual$added==0)

Matching Results for r semester round r round

Filled and open seminar slots after matching round 1:

seminars %>%
  group_by(semBAMA) %>%
  summarize(num_sems=n(), total_slots=sum(slots,na.rm=TRUE), filled=sum(filled_round1,na.rm=TRUE), open=total_slots-filled)

The following table shows for Bachelor and Master students, how many students found or found not a seminar.

sdf = df %>%
  group_by(studBAMA,found) %>% 
  summarise(n=n()) %>%
  ungroup() %>%
  spread(key = found, value=n) %>%
  replace.na(0) %>%
  as.data.frame()

if (ncol(sdf)==2)
  sdf$not_found  = 0

sdf$total = sdf[,2]+sdf[,3]
sdf

Here are the shares of students that got the seminar that they ranked at n'th position...

df %>%
  filter(num_ranked >0) %>%
  group_by(studBAMA) %>%
  mutate(N=n()) %>%
  group_by(studBAMA,pos, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  ungroup() %>%
  filter(found == "found seminar") %>%
  dcast(pos ~ studBAMA+found, value.var="share") %>%
  mutate(pos = paste0("Ranked ",pos)) %>%
  replace.na("0%")

And now we show how the length of a students' preference lists relates with the share of students that got assigned a seminar:

df %>% 
  group_by(studBAMA,num_ranked) %>% 
  mutate(N=n()) %>%
  group_by(studBAMA,num_ranked, found) %>% 
  summarise(share = paste0(round(100*n() / first(N),1),"%")) %>%
  dcast(num_ranked ~ studBAMA + found,value.var="share") %>%
  replace.na("0%")

Here are the number of students that rankend num_ranked seminars:

df %>% 
  group_by(studBAMA,num_ranked) %>% 
  summarise(number_students=n()) %>%
  dcast(num_ranked ~ studBAMA,value.var="number_students") %>%
  replace.na(0) %>%
  mutate("Total Number of Students" = Bachelor + Master)

Let us now compare for Bachelor students the seminar matching by their main subject:

df %>% 
  filter(studBAMA=="Bachelor") %>% 
  group_by(studSubject) %>%
  mutate(mean_num_sem_ranked=round(mean(num_ranked),1)) %>%
  group_by(studSubject, mean_num_sem_ranked, found) %>% 
  summarise(n=n()) %>%
  dcast(studSubject + mean_num_sem_ranked ~ found,value.var="n") %>%
  arrange(-`found seminar`) %>%
  replace.na(0)

and the same thing for master students...

df %>% 
  filter(studBAMA=="Master") %>% 
  group_by(studSubject) %>%
  mutate(mean_num_sem_ranked=round(mean(num_ranked),1)) %>%
  group_by(studSubject, mean_num_sem_ranked, found) %>% 
  summarise(n=n()) %>%
  dcast(studSubject + mean_num_sem_ranked ~ found,value.var="n") %>%
  arrange(-`found seminar`) %>%
  replace.na(0)

Filled slots after all matching rounds and manual assignments

seminars %>%
  group_by(semBAMA) %>%
  summarize(num_sems=n(), total_slots=sum(slots), filled=sum(filled_slots), open=total_slots-filled)

Matching round 2

Manually added or removed students

Background: - Because the world is not ideal, after the matching rounds, some students may drop out from the seminar, e.g. because they were dissatisfied with their topic, and then have been removed from the seminar list. - Other students are added, either as replacement for dropped out students, or because of an expansion of slots due to high demand. - Note that the numbers below would only be correct, if all lectures insert all additions and removals in the database and if no student would is added or removed multiple times.



skranz/SeminarMatching documentation built on June 9, 2020, 6:57 p.m.