Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----message=FALSE, warning=FALSE---------------------------------------------
library(ranger)
library(optRF)
SNPdata[1:5,1:5]
## ----echo=FALSE, message=FALSE------------------------------------------------
load("opt_prediction_vignette_initData.Rda")
## ----message=FALSE------------------------------------------------------------
Training = SNPdata[1:200,] # Rows 1 to 200 as training data
Test = SNPdata[201:250,-1] # Rows 201 to 250 as test data, excluding the response column (column 1)
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(123) # Set a seed for reproducibility
# RF_model = ranger(y=Training[,1], x=Training[,-1], write.forest = TRUE)
## ----message=FALSE------------------------------------------------------------
predictions = predict(RF_model, data=Test)$predictions
predicted_Test = data.frame(ID = row.names(Test), predicted_yield = predictions)
head(predicted_Test)
## ----message=FALSE------------------------------------------------------------
predicted_Test = predicted_Test[order(predicted_Test$predicted_yield, decreasing=TRUE),]
selected_individuals = predicted_Test[1:5,1]
selected_individuals
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(321) # Set a different seed for reproducibility
# RF_model_2 = ranger(y=Training[,1], x=Training[,-1], write.forest = TRUE)
# predictions_2 = predict(RF_model_2, data=Test)$predictions
# predicted_Test_2 = data.frame(ID = row.names(Test), predicted_yield = predictions_2)
## ----fig.width=6, fig.height=4.5, fig.align='center'--------------------------
M = merge(predicted_Test, predicted_Test_2, by="ID")
plot(M$predicted_yield.x, M$predicted_yield.y,
xlab="Predicted yield in the first run", ylab="Predicted yield in the second run")
cor(M$predicted_yield.x, M$predicted_yield.y)
## ----message=FALSE------------------------------------------------------------
predicted_Test_2 = predicted_Test_2[order(predicted_Test_2$predicted_yield, decreasing=TRUE),]
selected_individuals_2 = predicted_Test_2[1:5,1]
## ----message=FALSE------------------------------------------------------------
selected_individuals
selected_individuals_2
## ----eval=FALSE, message=FALSE------------------------------------------------
# num.trees_values = c(500, 1000, 1500, 2000, 2500, 3000)
# result = data.frame()
# for(i in num.trees_values){
#
# start.time = Sys.time()
#
# set.seed(123)
# RF_model_1 = ranger(y=Training[,1], x=Training[,-1], write.forest = TRUE, num.trees=i)
# predictions_1 = predict(RF_model_1, data=Test)$predictions
# predicted_Test_1 = data.frame(ID = row.names(Test), predicted_yield = predictions_1)
# predicted_Test_1 = predicted_Test_1[order(predicted_Test_1$predicted_yield, decreasing=TRUE),]
# selected_individuals_1 = predicted_Test_1[1:5,1]
#
# set.seed(321)
# RF_model_2 = ranger(y=Training[,1], x=Training[,-1], write.forest = TRUE, num.trees=i)
# predictions_2 = predict(RF_model_2, data=Test)$predictions
# predicted_Test_2 = data.frame(ID = row.names(Test), predicted_yield = predictions_2)
# predicted_Test_2 = predicted_Test_2[order(predicted_Test_2$predicted_yield, decreasing=TRUE),]
# selected_individuals_2 = predicted_Test_2[1:5,1]
#
# end.time = Sys.time()
#
# M = merge(predicted_Test_1, predicted_Test_2, by="ID")
# result = rbind(result, data.frame(number_of_trees = i,
# prediction_stability = cor(M$predicted_yield.x, M$predicted_yield.y),
# selection_stability = sum(selected_individuals_1 %in% selected_individuals_2)/5,
# computation_time = end.time - start.time))
# }
## ----message=FALSE------------------------------------------------------------
result
## ----fig.width=8, fig.height=4.5, fig.align='center'--------------------------
par(mfrow=c(1,2))
plot(prediction_stability ~ number_of_trees, data=result)
plot(computation_time ~ number_of_trees, data=result)
abline(lm(result$computation_time ~ result$number_of_trees), lwd=2, col="grey")
## ----echo=FALSE, message=FALSE------------------------------------------------
load("opt_prediction_vignette_optData.Rda")
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(123) # Set a seed for reproducibility
# optRF_result = opt_prediction(y=Training[,1], X=Training[,-1], X_Test=Test,
# alpha=0.1)
## ----message=FALSE------------------------------------------------------------
summary(optRF_result)
## ----eval=FALSE---------------------------------------------------------------
# set.seed(123)
# RF_model_1 = ranger(y=Training[,1], x=Training[,-1],
# write.forest = TRUE, num.trees=19000)
# predictions_1 = predict(RF_model_1, data=Test)$predictions
# predicted_Test_1 = data.frame(ID = row.names(Test), predicted_yield = predictions_1)
# predicted_Test_1 = predicted_Test_1[order(predicted_Test_1$predicted_yield, decreasing=TRUE),]
# selected_individuals_1 = predicted_Test_1[1:5,1]
#
# set.seed(321)
# RF_model_2 = ranger(y=Training[,1], x=Training[,-1],
# write.forest = TRUE, num.trees=19000)
# predictions_2 = predict(RF_model_2, data=Test)$predictions
# predicted_Test_2 = data.frame(ID = row.names(Test), predicted_yield = predictions_2)
# predicted_Test_2 = predicted_Test_2[order(predicted_Test_2$predicted_yield, decreasing=TRUE),]
# selected_individuals_2 = predicted_Test_2[1:5,1]
#
# M = merge(predicted_Test_1, predicted_Test_2, by="ID")
## ----fig.width=6, fig.height=4.5, fig.align='center'--------------------------
plot(M$predicted_yield.x, M$predicted_yield.y,
xlab="Predicted yield in the first run", ylab="Predicted yield in the second run")
cor(M$predicted_yield.x, M$predicted_yield.y)
## ----message=FALSE------------------------------------------------------------
selected_individuals_1
selected_individuals_2
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(123) # Set a seed for reproducibility
# optRF_result_2 = opt_prediction(y=Training[,1], X=Training[,-1], X_Test=Test,
# alpha=0.1, recommendation="selection")
## ----message=FALSE------------------------------------------------------------
summary(optRF_result_2)
## ----fig.width=6, fig.height=4.5, fig.align='center'--------------------------
plot_stability(optRF_result_2, measure="selection", from=0, to=200000)
## ----message=FALSE------------------------------------------------------------
estimate_numtrees(optRF_result_2, measure="selection", for_stability=0.9)
estimate_numtrees(optRF_result_2, measure="selection", for_stability=0.95)
estimate_numtrees(optRF_result_2, measure="selection", for_stability=0.99)
## ----message=FALSE------------------------------------------------------------
estimate_stability(optRF_result_2, with_num.trees=250000)
## ----echo=FALSE, message=FALSE------------------------------------------------
load("opt_prediction_vignette_stabilityData.Rda")
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(123)
# stability_pred_500 = measure_stability(y = Training[,1], X=Training[,-1], X_Test=Test, num.trees=500, method="prediction")
## ----message=FALSE------------------------------------------------------------
stability_pred_500
## ----eval=FALSE, message=FALSE------------------------------------------------
# set.seed(123)
# stability_pred_5000 = measure_stability(y = Training[,1], X=Training[,-1], X_Test=Test, num.trees=5000, method="prediction")
## ----message=FALSE------------------------------------------------------------
stability_pred_5000
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.