iris
summary(iris) dim(iris)
library(RepDataPeerAssessment1) library(rprojroot) project.data <- find_package_root_file('data') project.extdata <- find_package_root_file('inst/extdata') project.R <- find_package_root_file('R') project.data project.R project.extdata
Source: https://www.r-bloggers.com/classification-trees/
ecoli <- read.table(paste(project.extdata, "ecoli.data", sep = "/")) ecoli
names(ecoli) <- c("Sequence", "mcv", "gvh", "lip", "chg", "aac", "alm1", "alm2", "class") ecoli
save(ecoli, file = paste(project.extdata, "ecoli.rda", sep = "/"))
xtabs(~ class, data = ecoli)
iris
datasetMissing Completely at Random filling using the uniform distribution.
set.seed(971) y <- iris$Sepal.Length ni <- 150 nj <- 1 prop.m <- 0.05 mcar <- runif(ni*nj, min = 0, max = 1) y.mcar <- ifelse(mcar < prop.m, NA, y) y.mcar mean(is.na(y.mcar))
iris$Sepal.Length <- y.mcar
mean(is.na(iris$Sepal.Length)) summary(iris)
# find percentage NAs in all data frame columns colMeans(is.na(iris))
library(VIM) aggr_plot <- aggr(iris, col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, labels=names(data), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
library(mice) library(VIM) md.pattern(iris)
md.pairs(iris)
marginplot(iris[c(1,2)])
imp1 <- mice(iris)
imp1
imp1$imp$Sepal.Length
# appending multiple imputations datasets to original dataset # .imp variable contains the number of dataset with `0` being the original # there will be 150 * (5+1) = 900 rows # imp_tot2 <- complete(imp1, 'long', inc=TRUE) imp_tot2
##labels observed data in blue and imputed data in red for y1 col<-rep(c("blue", "red")[1+as.numeric(is.na(imp1$data$Sepal.Length))],6) ##plots data for y1 by imputation stripplot(Sepal.Length~.imp, data=imp_tot2, jit=TRUE,col=col, xlab="imputation Number")
library(ggplot2) library(lattice)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.