Nothing
#' Subset gbm.auto input datasets to 2 groups using the partial deviance plots
#'
#' Set your working directory to the output folder of a gbm.auto/gbm.loop run.
#' This function returns the variable value corresponding to the 0 value on the
#' lineplots, which should be the optimal place to split the dataset into 2
#' subsets, low and high, IF the relationship doesn't cross 0 more than once.
#' Function is similarly useful to quickly get the 0-point value in these cases,
#' i.e. where values below are detrimental, values above beneficial (check
#' plots though)
#'
#' loop varnames are BinLineLoop_VAR.csv & GausLineLoop_VAR.csv
#' normal varnames are Bin_Best_line_VAR.csv & Gaus_Best_line_VAR.csv
#'
#' Just use average between the last negative & first positive point
#' unless any points fall on zero
#' @param x Vector of variable names.
#' @param fams Vector of statistical data distribution family names to be modelled by gbm.
#' @param loop Is the folder a gbm.loop output?
#'
#' @return a list of breakpoint values which datasets can be subsetted using.
#' @export
#' @importFrom utils read.csv
#' @author Simon Dedman, \email{simondedman@@gmail.com}
#' @examples
#' \donttest{
#' # Not run: requires completed gbm.auto run.
#' # having run gbm.auto (with linesfiles=TRUE), set working directory there
#' data(samples)
#' gbm.subset(x = names(samples[c(4:8, 10)]), fams = c("Bin", "Gaus"))
#' }
#'
gbm.subset <- function(x, #Vector of variable names.
fams = c("Bin", "Gaus"), # Vector of family names modelled by gbm.
loop = FALSE) { #is the folder a gbm.loop output?
subsetsplits <- list() #create blank list object
for (j in fams) { #loop through families
for (i in x) { #loop through variable names' files
if (loop) {if (file.exists(paste0(j, "LineLoop_", i, ".csv"))) { #if file exists
tmp <- read.csv(paste0(j, "LineLoop_", i, ".csv")) #read in csv file
tmp <- tmp[, c(1, length(tmp) - 2)]} #keep only X & averageY
} else {#if not loop
if (file.exists(paste0(j, "_Best_line_", i, ".csv"))) {#if file exists
tmp <- read.csv(paste0(j, "_Best_line_", i, ".csv"))}} #read in csv file
if (exists("tmp")) { #if csv file was read (x names used in gbm may not have generated files)
if (!is.na(match(0, sign(tmp[, 2])))) { # if there's an exact 0 value in the Y column,
subsetsplits[[i]] <- tmp[match(0, sign(tmp[, 2])), 1] #set the corresponding X value as a list item named i
} else {# if there isn't (normal)
row1 <- which(diff(sign(tmp[, 2])) != 0) #gives the last row before crossing the Y=0 intercept
row2 <- row1 + 1 #first row after
subsetsplits[[i]] <- mean(c(tmp[row1, 1], #set list value i as average of the points
tmp[row2, 1])) #before & after intercept crossing
} #close 'point on 0 line' ifelse
rm(tmp) #remove tmp if it was created
} #close if exists tmp
} #close var.names loop
} #close fam.names loop
return(subsetsplits) #return object
} #close function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.