R/random_dataset.R

ratingVectorForEnv <- function(env) {
  function(name) {
    index <- env$reviewerNames == name
    selectedExpectation <- env$reviewerExpectedValues[index]
    ratings <- abs(rnorm(env$N, selectedExpectation, 1.5))

    # Randomly delete some ratings to simulate that
    # not all reviewers reviewed every document

    indices <- sample(c(1:env$N), 3)
    ratings[indices] <- NA
    ratings[ratings > 9] <- 10
    ratings %>% ceiling
  }
}

randomDataset <- function(number_of_documents, number_of_reviewers) {
  env    <- new.env()
  env$N  <- number_of_documents
  env$NR <- number_of_reviewers

  reviewerNamesDictionary     <- c("Ricarda", "Ryan", "Rose", "Rahel", "Ruben", "Roy", "Roxy", "Robin", "Rick")
  applicantNamesDictionary    <- c("Ava" , "Abigail", "Andrea", "Andre", "Adele", "April", "Anna", "Arthur", "Ashley", "Aisha", "Albert", "Ali", "Aljona", "Amanda", "Anna", "Ally")
  applicantSurnamesDictionary <- c("Galois", "Riemann", "Gauß", "Weyl", "Gödel", "Wittgenstein", "Weierstraß", "Händel", "Bach", "Mozart", "Goethe", "Schiller", "Koch", "Rosenbaum")

  env$reviewerNames  <- sample(reviewerNamesDictionary, env$NR, replace = TRUE) %>% t
  surnames           <- sample(applicantSurnamesDictionary, env$N, replace = TRUE) %>% t
  env$applicantNames <- sample(applicantNamesDictionary, env$N, replace = TRUE) %>% paste(surnames) %>% t


  # Choose "hidden" random expecation values between 2 and 8
  # for our reviewers. This is a simulation of different
  # strictness of the reviewers and shall "revealed" by our
  # model afterwards
  env$reviewerExpectedValues <- runif(env$NR, min=2, max=8) %>% ceiling
  env$reviewerInformation    <- rbind(env$reviewerExpectedValues)

  rownames(env$reviewerInformation) <- c("Hidden expected value")
  colnames(env$reviewerInformation) <- env$reviewerNames

  env$ratings           <- env$reviewerNames %>% apply(2, ratingVectorForEnv(env))
  colnames(env$ratings) <- env$reviewerNames
  rownames(env$ratings) <- env$applicantNames
  env
}
neumanrq/fairreviewers documentation built on May 24, 2019, 5:06 a.m.