Nothing
discretize.all <- function(formula, data) {
new_data = get.data.frame.from.formula(formula, data)
dest_column_name = dimnames(new_data)[[2]][1]
if(!is.factor(new_data[[1]])) {
new_data[[1]] = equal.frequency.binning.discretization(new_data[[1]], 5)
}
new_data = supervised.discretization(formula, data = new_data)
# reorder attributes
new_data = get.data.frame.from.formula(formula, new_data)
return(new_data)
}
# unupervised
equal.frequency.binning.discretization <- function(data, bins) {
bins = as.integer(bins)
if (!is.numeric(data))
stop("Data must be numeric")
if(bins < 1)
stop("Number of bins too small")
complete = complete.cases(data)
ord = do.call(order, list(data))
len = length(data[complete])
blen = len / bins
new_data = data
p1 = p2 = 0
for(i in 1:bins) {
p1 = p2 + 1
p2 = round(i * blen)
new_data[ord[p1:min(p2, len)]] = i
}
return(factor(new_data))
}
# unupervised
equal.width.binning.discretization <- function(data, bins) {
if (!is.numeric(data))
stop("Data must be numeric")
if(bins < 1)
stop("Number of bins too small")
return(cut(data, bins))
}
#MDL - Fayyad, Irani
supervised.discretization <- function(formula, data) {
data = get.data.frame.from.formula(formula, data)
complete = complete.cases(data[[1]])
all.complete = all(complete)
if(!all.complete) {
new_data = data[complete, , drop=FALSE]
result = Discretize(formula, data = new_data, na.action = na.pass)
return(result)
} else {
return(Discretize(formula, data = data, na.action = na.pass))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.