R/expertForests.R

#' Create set of expert forests
#'
#' @param x is an object
#' @return y is the output
#' @export

expertForests <- function(y.train, y.test,
                          df.train, df.test, df.testREAL,
                          n.train, n.test, n.testREAL, uniq.crty,
                          num_expForests=3, levels,
                          ntree = 100, mtry = 30, nodesize = 10){

  # Initialise empty result matricies
  matrix_y.hat.train <- matrix(0, nrow = n.train, ncol = num_expForests)
  matrix_y.hat.test <- matrix(0, nrow = n.test, ncol = num_expForests)
  matrix_y.hat.testREAL <- matrix(0, nrow = n.testREAL, ncol = num_expForests)

  # Create variable subsets
  df.train <- as.data.frame(df.train)
  df.train[] <- lapply(df.train, factor, levels = levels)

  df.test <- as.data.frame(df.test)
  df.test[] <- lapply(df.test, factor, levels = levels)

  df.testREAL <- as.data.frame(df.testREAL)
  df.testREAL[] <- lapply(df.testREAL, factor, levels = levels)

  # Define additionally necessary variables
  y.train_0 <- y.train
  df.train_0 <- df.train
  err.ctry <- matrix(99,ncol=26,nrow=1)
  uniq.crty <- unique(y.train)

  for(h in 1:num_expForests){
    # Estimate Rnd Forest
    df <- df.train
    df$y <- factor(y.train, levels = c("ACB","ASW","BEB","CDX", "CEU","CHB","CHS",
                               "CLM","ESN","FIN","GBR","GIH","GWD","IBS",
                               "ITU","JPT","KHV","LWK","MSL","MXL","PEL",
                               "PJL","PUR","STU","TSI","YRI"))
    res.rndForest <- randomForest(y ~ ., data=df,
                                  ntree=ntree, mtry=mtry, nodesize=nodesize)

    # Calculate predicted labels for all datasets
    matrix_y.hat.train[,h] <- as.character(predict(res.rndForest, df.train))
    matrix_y.hat.test[,h] <- as.character(predict(res.rndForest, df.test))
    matrix_y.hat.testREAL[,h] <- as.character(predict(res.rndForest, df.testREAL))

    # Draw weighted sample proportional to misclassification of labels
    y.hat <- as.factor(matrix_y.hat.test[,h])
    err.w.sample2 <- err.w.sample(y.train_0 = y.train_0, y.test = y.test, y.hat = y.hat,
                                  n.train = n.train, n.test = n.test,
                                  err.ctry = err.ctry, uniq.crty = uniq.crty)
    new.train <- err.w.sample2$new.train
    err.ctry <- err.w.sample2$err.ctry
    df.train <- df.train_0[new.train,]
  }

  res.list <- list(matrix_y.hat.train = matrix_y.hat.train,
                   matrix_y.hat.test = matrix_y.hat.test,
                   matrix_y.hat.testREAL = matrix_y.hat.testREAL)
  return(res.list)
}
thomaswiemann/SMLpractical documentation built on May 28, 2019, 12:23 p.m.