inst/doc/intro.R

## ---- echo=FALSE--------------------------------------------------------------
library(simputation)

## ----eval=FALSE---------------------------------------------------------------
#  install.packages('simputation')

## ----echo=FALSE---------------------------------------------------------------
knitr::kable(
  data.frame(
      `function` = c("impute_rlm"    ,"impute_en"              , "impute_cart", "impute_rf", "impute_rhd","impute_shd","impute_knn","impute_mf","impute_em")
    , model = c("M-estimation", "ridge/elasticnet/lasso", "CART"       , "random forest","random hot deck","sequential hot deck","k nearest neighbours","missForest","mv-normal")
    , package = c("MASS"      ,"glmnet"                 , "rpart"      , "randomForest","VIM (optional)","VIM (optional)","VIM (optional)","missForest","norm")
    , R.recommended = c("yes","no","yes","no","no","no","no","no","no")
    ,stringsAsFactors=FALSE
  )
)

## ---- eval=FALSE--------------------------------------------------------------
#  impute_<model>(data, formula, [model-specific options])

## ---- eval=FALSE--------------------------------------------------------------
#  IMPUTED ~ MODEL_SPECIFICATION [ | GROUPING ]

## -----------------------------------------------------------------------------
dat <- iris
dat[1:3,1] <- dat[3:7,2] <- dat[8:10,5] <- NA
head(dat,10)

## -----------------------------------------------------------------------------
da1 <- impute_lm(dat, Sepal.Length ~ Sepal.Width + Species)
head(da1,3)

## -----------------------------------------------------------------------------
da2 <- impute_median(da1, Sepal.Length ~ Species)
head(da2,3)

## -----------------------------------------------------------------------------
da3 <- impute_cart(da2, Species ~ .)
head(da3,10)

## ---- eval=FALSE--------------------------------------------------------------
#  da4 <- dat |>
#    impute_lm(Sepal.Length ~ Sepal.Width + Species) |>
#    impute_median(Sepal.Length ~ Species) |>
#    impute_cart(Species ~ .)

## -----------------------------------------------------------------------------
da5 <- impute_rlm(dat, Sepal.Length + Sepal.Width ~ Petal.Length + Species)
head(da5)

## -----------------------------------------------------------------------------
da6 <- impute_lm(dat, . - Species ~ 0 + Species, add_residual = "normal")
head(da6)

## -----------------------------------------------------------------------------
# New data set, leaving Species intact
dat <- iris
dat[1:3,1] <- dat[3:7,2] <- NA

# split dat into groups according to 'Species', impute, combine and return.
da8 <- impute_lm(dat, Sepal.Length ~ Petal.Width | Species)
head(da8)

## ----eval=FALSE---------------------------------------------------------------
#  library(magrittr)
#  library(dplyr)
#  
#  dat <- iris
#  dat[1:3,1] <- dat[3:7,2] <- NA
#  
#  dat |> group_by(Species) |>
#    impute_lm(Sepal.Length ~ Petal.Width)

## -----------------------------------------------------------------------------
dat <- iris
dat[1:3,1] <- dat[3:7,2] <- NA

dat <- impute_proxy(dat, Sepal.Length ~ median(Sepal.Length,na.rm=TRUE)/median(Sepal.Width, na.rm=TRUE) * Sepal.Width | Species)
head(dat)

## -----------------------------------------------------------------------------
m <- lm(Sepal.Length ~ Sepal.Width + Species, data=iris)

## -----------------------------------------------------------------------------
dat <- iris
dat[1:3,1] <- dat[3:7,2] <- NA
head(dat)

dat <- impute(dat, Sepal.Length ~ m)
head(dat)

## ----eval=FALSE---------------------------------------------------------------
#  dat <- data.frame(
#    foo = c(1,2,NA,4)
#    , bar = c(1,NA,8,NA)
#  )
#  # sequential hotdeck imputation, no sorting variables
#  impute_shd(dat, . ~ 1, pool="complete")
#  impute_shd(dat, . ~ 1, pool="univariate")
#  impute_shd(dat, .~1, backend="VIM")

Try the simputation package in your browser

Any scripts or data that you put into this service are public.

simputation documentation built on June 16, 2022, 5:10 p.m.