#' @title cut3
#'
#' @description Hmisc::cut2 bones repackaged to remove errors with importing Hmisc
#'
#' @param x numeric vector to classify into intervals.
#' @param cuts cut points.
#' @param m desired minimum number of observations in a group. The algorithm does not guarantee that all groups will have at least m observations.
#' @param g number of quantile groups
#' @param digits number of significant digits to use in constructing levels.
#' @param minmax if cuts is specified but min(x)<min(cuts) or max(x)>max(cuts), augments cuts to include min and max x
#' @param oneval if an interval contains only one unique value, the interval will be labeled with the formatted version of that value instead of the interval endpoints, unless oneval=FALSE
#' @param onlycuts set to TRUE to only return the vector of computed cuts. This consists of the interior values plus outer ranges.
#' @param formatfun format function
#' @param ... additional arguments passed to formatfun
#'
#' @return vector of cut
#'
#'
#' @export
#' @importFrom stats "approx"
cut3 <- function(x, cuts, m=150, g, digits, minmax=TRUE,
oneval=TRUE, onlycuts=FALSE, formatfun = format, ...){
method <- 1
formatfun <- format
x.unique <- base::sort(base::unique(c(x[!is.na(x)],if(!missing(cuts))cuts)))
min.dif <- base::min(diff(x.unique))/2
min.dif.factor <- 1
## Make formatted values look good
if(missing(digits))
digits <- 5
## add digits to formatfun's arguments if relevant
format.args <-
if (any(c("...","digits") %in% names(formals(args(formatfun))))) {
c(digits = digits, list(...))
} else {
list(...)
}
oldopt <- options('digits')
options(digits=digits)
on.exit(options(oldopt))
xlab <- attr(x, 'label')
if(missing(cuts)) {
nnm <- sum(!is.na(x))
if(missing(g)) g <- max(1,floor(nnm/m))
if(g < 1)
stop('g must be >=1, m must be positive')
options(digits=15)
n <- table(x)
xx <- as.double(names(n))
options(digits=digits)
cum <- cumsum(n)
m <- length(xx)
y <- as.integer(ifelse(is.na(x),NA,1))
labs <- character(g)
cuts <- approx(cum, xx, xout=(1:g)*nnm/g,
method='constant', rule=2, f=1)$y
cuts[length(cuts)] <- max(xx)
lower <- xx[1]
upper <- 1e45
up <- low <- double(g)
i <- 0
for(j in 1:g) {
cj <- if(method==1 || j==1) cuts[j] else {
if(i==0)
stop('program logic error')
s <- if(is.na(lower)) FALSE else xx >= lower
cum.used <- if(all(s)) 0 else max(cum[!s])
if(j==m) max(xx) else if(sum(s)<2) max(xx) else
approx(cum[s]-cum.used, xx[s], xout=(nnm-cum.used)/(g-j+1),
method='constant', rule=2, f=1)$y
}
if(cj==upper) next
i <- i + 1
upper <- cj
y[x >= (lower-min.dif.factor*min.dif)] <- i
low[i] <- lower
lower <- if(j==g) upper else min(xx[xx > upper])
if(is.na(lower)) lower <- upper
up[i] <- lower
}
low <- low[1:i]
up <- up[1:i]
variation <- logical(i)
for(ii in 1:i) {
r <- range(x[y==ii], na.rm=TRUE)
variation[ii] <- diff(r) > 0
}
if(onlycuts) return(unique(c(low, max(xx))))
flow <- do.call(formatfun,c(list(low), format.args))
fup <- do.call(formatfun,c(list(up), format.args))
bb <- c(rep(')',i-1),']')
labs <- ifelse(low==up | (oneval & !variation), flow,
paste('[',flow,',',fup,bb,sep=''))
ss <- y==0 & !is.na(y)
if(any(ss))
stop(paste('categorization error in cut2. Values of x not appearing in any interval:\n',
paste(format(x[ss],digits=12),collapse=' '),
'\nLower endpoints:',
paste(format(low,digits=12), collapse=' '),
'\nUpper endpoints:',
paste(format(up,digits=12),collapse=' ')))
y <- structure(y, class='factor', levels=labs)
} else {
if(minmax) {
r <- range(x, na.rm=TRUE)
if(r[1]<cuts[1]) cuts <- c(r[1], cuts)
if(r[2]>max(cuts)) cuts <- c(cuts, r[2])
}
l <- length(cuts)
k2 <- cuts-min.dif
k2[l] <- cuts[l]
y <- base::cut(x, k2)
}
means <- tapply(x, y, function(w)mean(w,na.rm=TRUE))
levels(y) <- do.call(formatfun,c(list(means), format.args))
attr(y,'class') <- "factor"
if(length(xlab)) label(y) <- xlab
y
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.