inst/doc/usingMiceRanger.R

params <-
list(EVAL = TRUE)

## ---- SETTINGS-knitr, include=FALSE-------------------------------------------
stopifnot(require(knitr))
opts_chunk$set(
  eval = if (isTRUE(exists("params"))) params$EVAL else FALSE
)

## ----amputeData,message=FALSE-------------------------------------------------
require(miceRanger)
set.seed(1)

# Load data
data(iris)

# Ampute the data. iris contains no missing values by default.
ampIris <- amputeData(iris,perc=0.25)
head(ampIris,10)

## ----simpleMice,message=FALSE-------------------------------------------------
# Perform mice, return 6 datasets. 
seqTime <- system.time(
  miceObj <- miceRanger(
      ampIris
    , m=6
    , returnModels = TRUE
    , verbose=FALSE
  )
)

## ----parMice,message=FALSE----------------------------------------------------
library(doParallel)

# Set up back ends.
cl <- makeCluster(2)
registerDoParallel(cl)

# Perform mice 
parTime <- system.time(
  miceObjPar <- miceRanger(
      ampIris
    , m=6
    , parallel = TRUE
    , verbose = FALSE
  )
)
stopCluster(cl)
registerDoSEQ()

## ----parFaster----------------------------------------------------------------
perc <- round(1-parTime[[3]]/seqTime[[3]],2)*100
print(paste0("The parallel process ran ",perc,"% faster using 2 R back ends."))

## ----addToMice----------------------------------------------------------------
miceObj <- addIterations(miceObj,iters=2,verbose=FALSE)
miceObj <- addDatasets(miceObj,datasets=1,verbose=FALSE)

## ----customSetup--------------------------------------------------------------
v <- list(
  Sepal.Width = c("Sepal.Length","Petal.Width","Species")
  , Sepal.Length = c("Sepal.Width","Petal.Width")
  , Species = c("Sepal.Width")
)
pmm <- c(
    Sepal.Width = "meanMatch"
  , Sepal.Length = "value"
  , Species = "meanMatch"
)
mmc <- c(
    Sepal.Width = 4
  , Species = 10
)

miceObjCustom <- miceRanger(
    ampIris
  , vars = v
  , valueSelector = pmm
  , meanMatchCandidates = mmc
  , verbose=FALSE
)

## -----------------------------------------------------------------------------
newDat <- amputeData(iris)
newImputed <- impute(newDat,miceObj,verbose=FALSE)

## ----completeData-------------------------------------------------------------
dataList <- completeData(miceObj)
head(dataList[[1]],10)

## ----impAccuracy,message=FALSE,echo=FALSE,fig.height = 6,warning=FALSE--------
require(ggplot2)
require(ggpubr)

plotVars <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")

plotList <- lapply(
    plotVars
  , function(x) {
    missIndx <- is.na(ampIris[,get(x)])
    impVsAmp <- data.table(
      originalData = iris[missIndx,x]
      , imputedData = dataList[[1]][missIndx,get(x)]
      , Species = iris[missIndx,]$Species
    )
    return(
      ggscatter(impVsAmp,x="originalData",y="imputedData",add="reg.line",size = 0) +
        geom_point(data=impVsAmp,aes(x=originalData,y=imputedData,color=Species)) +
        stat_cor(label.x = min(impVsAmp$originalData), label.y = max(impVsAmp$imputedData)*0.9+0.1*min(impVsAmp$imputedData)) +
        xlab(paste0("Original ",x)) +
        ylab(paste0("Imputed ",x))
    )
  }  
)
arranged <- ggarrange(
    plotlist = plotList
  , common.legend = TRUE
)
annotate_figure(
    arranged
  , top=text_grob(
        "Original Data Compared to Imputed Value"
      , face = "bold"
      , size = 14
    )
)

Try the miceRanger package in your browser

Any scripts or data that you put into this service are public.

miceRanger documentation built on Sept. 6, 2021, 5:07 p.m.