####################
# Author: James Hickey
#
# Series of tests to check the functionality that creates strata
#
####################
context("Testing Strata creation:")
test_that("Strata creation function requires GBMData and GBMDist objects", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4,
num_features = 3, bag_fraction = 1, min_num_obs_in_node = 1)
dist <- gbm_dist("CoxPH")
# When not a GBMData object or GBMDist
copy_data <- data
copy_dist <- dist
attr(data, "class") <- "NOTData"
attr(dist, "class") <- "NOTDist"
# Then an error is thrown
expect_error(create_strata(data, train_p, copy_dist))
expect_error(create_strata(copy_data, train_p, dist))
})
test_that("Strata are NA if distribution is not CoxPH", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4,
num_features = 3, bag_fraction = 1, min_num_obs_in_node = 1)
# GIVEN Dist Not COXPH
dist <- gbm_dist("AdaBoost")
# When strata created
dist <- create_strata(data, train_p, dist)
# Then dist object is unchanged
expect_true(is.na(dist$strata))
expect_true(is.na(dist$sorted))
})
test_that("Creating strata fills strata, time_order and sorted fields - CoxPH", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N/2)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4, num_features = 3,
bag_fraction = 1, min_num_obs_in_node = 1)
# GIVEN Dist - COXPH
dist <- gbm_dist("CoxPH")
# When strata created
dist <- create_strata(data, train_p, dist)
# Then dist object is changed
expect_equal(length(dist$strata), N)
expect_equal(length(dist$time_order), N)
expect_equal(nrow(dist$sorted), N)
})
test_that("If response is a matrix with more than 3 columns strata cannot be created - CoxPH", {
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N)
# GIVEN Dist - COXPH
dist <- gbm_dist("CoxPH")
# When response has too many columns - set to 4
Resp <- data.frame(tt, delta)
Resp <- cbind(cbind(Resp, rnorm(N)), rnorm(N))
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4, num_features = 3,
bag_fraction = 1, min_num_obs_in_node = 1)
# Then error thrown when creating strata
expect_error(create_strata(data, train_p, dist))
})
test_that("If strata field in distribution object is NULL, all data are put in same strata - CoxPH", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4, num_features = 3,
bag_fraction = 1, min_num_obs_in_node = 1)
# GIVEN Dist - COXPH
dist <- gbm_dist("CoxPH")
# When creating strata with strata initialized to NULL
dist$strata <- NULL
dist <- create_strata(data, train_p, dist)
# Then all examples put in same strata
expect_equal(dist$strata[1], N)
})
test_that("The training responses are sorted according to strata and this order is stored in time_order", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/5), rep(2, N/5), rep(3, N/5), rep(4, N/5)), num_train = 4, num_features = 3,
bag_fraction = 1, min_num_obs_in_node = 1)
# GIVEN Dist - COXPH
dist <- gbm_dist("CoxPH")
# When strata are created
dist <- create_strata(data, train_p, dist)
# Then the training responses are sorted according to strata
expect_equal(order(-data$y[seq_len(4*N/5), 1]), dist$time_order[seq_len(4*N/5)])
expect_equal(order(-data$y[(4*N/5 + 1):N, 1])+(4*N/5), dist$time_order[(4*N/5 + 1):N])
})
test_that("Strata not NULL then observations put in different strata - CoxPH", {
# Require Surv to be available
require(survival)
# create some data
set.seed(1)
N <- 3000
X1 <- runif(N)
X2 <- runif(N)
X3 <- factor(sample(letters[1:4],N,replace=T))
mu <- c(-1,0,1,2)[as.numeric(X3)]
f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10)
tt.surv <- rexp(N,exp(f))
tt.cens <- rexp(N,0.5)
delta <- as.numeric(tt.surv <= tt.cens)
tt <- apply(cbind(tt.surv,tt.cens),1,min)
# throw in some missing values
X1[sample(1:N,size=100)] <- NA
X3[sample(1:N,size=300)] <- NA
# random weights if you want to experiment with them
w <- rep(1,N)
offset <- rep(0, N/2)
Resp <- Surv(tt, delta)
data <- gbm_data(data.frame(X1, X2, X3), Resp, w, offset)
train_p <- training_params(id=c(rep(1, N/4), rep(2, N/4), rep(3, N/4), rep(4, N/4)), num_train = 4, num_features = 3,
bag_fraction = 1, min_num_obs_in_node = 1)
# GIVEN Dist - COXPH
dist <- gbm_dist("CoxPH")
Num_Strata <- 5
strata <- sample(seq_len(Num_Strata), N, replace=TRUE)
dist$original_strata_id <- strata
# When strata are created
dist <- create_strata(data, train_p, dist)
# Then ordered by id
expect_equal(dist$strata[seq_len(Num_Strata)], as.vector(cumsum(table(strata))))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.