Nothing
#' Distance calculation based on RandomForest Proximity or Depth
#'
#' @param x a data.frame
#' @param y a second data.frame
#' @param rfObject \code{ranger} object
#' @param method distance calculation method, Proximity (Default) or Depth.
#' @param threads number of threads to use
#'
#' @return a \code{dist} or a matrix object with pairwise distance of
#' observations in x vs y (if not null)
#'
#' @examples
#' \donttest{
#' library(ranger)
#' # proximity pairwise distances
#' rf.fit <- ranger(Species ~ ., data = iris, num.trees = 500, write.forest = TRUE)
#' distance_random_forest(x = iris[, -5], rfObject = rf.fit, method = "Proximity", threads = 1)
#'
#' # depth distance for train versus test subset
#' set.seed(1234L)
#' learn <- sample(1:150, 100)
#' test <- (1:150)[-learn]
#' rf.fit <- ranger(Species ~ ., data = iris[learn, ], num.trees = 500, write.forest = TRUE)
#' distance_random_forest(x = iris[learn, -5], y = iris[test, -5], rfObject = rf.fit, method = "Depth")
#' }
#'
#' @export
distance_random_forest <- function(x, y = NULL, rfObject, method = "Proximity", threads = NULL) {
method <- match.arg(method, c("Proximity", "Depth"))
validate_ranger(rfObject)
# set number of threads
if (!is.null(threads)) {
RcppParallel::setThreadOptions(numThreads = threads)
}
# Distance calculation
if (method == "Proximity") {
rf_dist <- proximity_distance(x = x, y = y, rfObject = rfObject)
} else if (method == "Depth") {
rf_dist <- depth_distance(x = x, y = y, rfObject = rfObject)
rf_dist <- rf_dist / rfObject$num.trees
}
rf_dist
}
#' Get proximity matrix of an ranger object
#'
#' @param x a new dataset
#' @param y a second new dataset (Default: NULL)
#' @param rfObject \code{ranger} object
#' @param as_dist Bool, return a dist object.
#'
#' @return a \code{dist} or a matrix object with pairwise proximity of
#' observations in x vs y (if not null)
#'
#' @examples
#' \donttest{
#' library(ranger)
#' rf <- ranger(Species ~ ., data = iris, num.trees = 5, write.forest = TRUE)
#' proximity_distance(x = iris[, -5], rfObject = rf)
#'
#' set.seed(1234L)
#' learn <- sample(1:150, 100)
#' test <- (1:150)[-learn]
#' rf <- ranger(Species ~ ., data = iris[learn, ], num.trees = 500, write.forest = TRUE)
#' proximity_distance(x = iris[learn, -5], y = iris[test, -5], rfObject = rf)
#' }
#' @export
proximity_distance <- function(x, y = NULL, rfObject, as_dist = TRUE) {
xNodes <- terminal_nodes(as.matrix(x), rfObject)
if (is.null(y)) {
d <- cpp_proximityMatrix(xNodes)
n <- nrow(x)
# convert to dist object
rf_dist <- as_dist_object(d, n, "RFProximity")
} else {
yNodes <- terminal_nodes(as.matrix(y), rfObject)
rf_dist <- cpp_proximityMatrixRangerXY(xNodes, yNodes)
}
# to distance
if (as_dist) {
rf_dist <- sqrt(max(rf_dist) - rf_dist)
}
rf_dist
}
#' @title Depth Distance
#'
#' @description This function returns for each observation the pairwise sum of edges
#' between the corresponding terminal nodes over each tree in the random forest.
#'
#' @param x A data.frame with the same columns as in the training data of the RandomForest model
#' @param y A data.frame with the same columns as in the training data of the RandomForest model
#' @param rfObject \code{ranger} object
#'
#' @examples
#' \donttest{
#' library(ranger)
#' rf <- ranger(Species ~ ., data = iris, num.trees = 5, write.forest = TRUE)
#' depth_distance(x = iris[, -5], rfObject = rf)
#' }
#'
#' @export
depth_distance <- function(x, y = NULL, rfObject) {
xNodes <- terminal_nodes(as.matrix(x), rfObject)
rfTrees <- ranger_forests_to_matrix(rfObject)
if (is.null(y)) {
d <- cpp_depthMatrix(xNodes, rfTrees)
n <- nrow(x)
# convert to dist object
rf_dist <- as_dist_object(d, n, "RFDepth")
} else {
yNodes <- terminal_nodes(as.matrix(y), rfObject)
rf_dist <- cpp_depthMatrixRangerXY(xNodes, yNodes, rfTrees)
}
rf_dist
}
#' @title Number of Edges between Terminal Nodes
#'
#' @description first two columns are terminal node IDs; If an ID pair do not
#' appear in a tree -1 is inserted
#'
#' @param rfObject \code{ranger} object
#'
#' @return a \code{matrix} object with pairwise terminal node edge length
#'
#' @examples
#' \donttest{
#' library(ranger)
#' rf.fit <- ranger(Species ~ ., data = iris, num.trees = 5, write.forest = TRUE)
#' edges_between_terminal_nodes(rf.fit)
#' }
#'
#' @export
edges_between_terminal_nodes <- function(rfObject) {
validate_ranger(rfObject)
rfTrees <- ranger_forests_to_matrix(rfObject)
cpp_TerminalNodeDistance(rfTrees)
}
#' Weighted Distance calculation
#'
#' @param x a new dataset
#' @param y a second new dataset
#' @param weights a vector of weights
#'
#' @return a \code{dist} or \code{matrix} object
#'
#' @examples
#' \donttest{
#' library(ranger)
#' rf <- ranger(Species ~ ., data = iris, num.trees = 5, write.forest = TRUE)
#' terminal_nodes(iris[, -5], rf)
#' }
#'
#' @export
weighted_distance <- function(x, y = NULL, weights = NULL) {
if (is.null(weights)) {
weights <- seq(1, ncol(x))
}
if (is.null(y)) {
d <- cpp_weightedDistance(x, weights)
return(as_dist_object(d, nrow(x), "weightedDistance"))
} else {
return(cpp_weightedDistanceXY(x, y, weights))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.