# R/reformM.r In harrelfe/Hmisc: Harrell Miscellaneous

#### Documented in reformM

```# Function to automatically reorder variables for use with aregImpute
# Modification of function written by Yong Hao Pua
# Specify nperm=integer to obtain that many formulae with random
# permutation of order of variables; omit nperm to obtain formula
# with variables sorted by decreasing number of NAs

reformM <- function(formula, data, nperm) {

tlabels <- attr(terms(formula), "term.labels")   ## minor tweak!
cs <- all.vars(formula)
data <- data[, cs]
ismiss <- function(x) if(is.character(x)) is.na(x) | x==''  else is.na(x)
m <- sapply(data, ismiss)
miss.per.obs <- apply(m, 1, sum)
miss.per.var <- apply(m, 2, sum)
# Percent of observations with any missings:
pm <- ceiling(100 * sum(miss.per.obs > 0) / length(miss.per.obs))
nimpute = max(5, pm)
cat("Recommended number of imputations:", nimpute, "\n")

if(missing(nperm)) {
j <- order(miss.per.var, decreasing=TRUE) # var with highest NA freq first
formula <- as.formula(paste('~', paste(tlabels[j], collapse=' + ')))
}
else {
formula <- list()
prev <- character(0)
# Could add logic to sample() until permutation is different from
# all previous permutations
for(i in 1 : nperm) {
## Try up to 10 times to get a unique permutation
for(j in 1 : 10) {
f <- paste('~', paste(sample(tlabels), collapse=' + '))
if(f %nin% prev) {
prev <- c(prev, f)
formula[[i]] <- as.formula(f)
break
}
}
}

if(nperm == 1) formula <- formula[[1]]
}
formula
}
```
harrelfe/Hmisc documentation built on May 19, 2024, 4:13 a.m.