Center for Renal Precision Medicine

knitr::opts_chunk$set(echo = TRUE)

Objectives

Requirements and Konstants

#random seed
set.seed(44701)

#konstants

#libraries
if(!require("randomForest")){
  install.packages("randomForest")
}
library(randomForest)

Download data

data <- read.csv("../data/partitioned_withgroups.csv", header=TRUE, stringsAsFactors=FALSE)
#remove patiend id, rgroup, and vgroup labels
drops <- c("patients", "rawdata.r_group", "rawdata.v_group")
data <- data[,!names(data) %in%drops]

Calculate classification error for balanced partition

rf_balanced <- randomForest(data[,names(data)!="group"], y=as.factor(data$group), na.action = "na.exclude")
classerr_balanced <-unlist(rf_balanced$confusion[,6])
classerr_balanced
mean(classerr_balanced)

Calculate classification error distribution for random partitioning

classerr <- c()
for (i in 1:500){
  #shuffle group labels
  randgroup <- as.factor(sample(data$group))
  rf <- randomForest(data[,names(data)!="group"], y=randgroup, na.action = "na.exclude")
  classerr <- append(classerr, mean(unlist(rf$confusion[,6])))
}
t.test(classerr,mean(classerr_balanced))


dmontemayor/Rcricvol documentation built on Sept. 9, 2021, 9:12 a.m.