Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(specleanr)
## ----Getting iris and adding some outliers------------------------------------
irisdata1 <- iris
#introduce outlier data and NAs
rowsOutNA1 <- data.frame(x= c(344, NA,NA, NA),
x2 = c(34, 45, 544, NA),
x3= c(584, 5, 554, NA),
x4 = c(575, 4554,474, NA),
x5 =c('setosa', 'setosa', 'setosa', "setosa"))
colnames(rowsOutNA1) <- colnames(irisdata1)
dfinal <- rbind(irisdata1, rowsOutNA1)
## ----outlier detection using iris dataset-------------------------------------
setosadf <- dfinal[dfinal$Species%in%"setosa",c("Sepal.Width", 'Species')]
setosa_outlier_detection <- multidetect(data = setosadf,
var = 'Sepal.Width',
multiple = FALSE,
methods = c("adjbox", "iqr", "hampel","jknife",
"seqfences", "mixediqr",
"distboxplot", "semiqr",
"zscore", "logboxplot", "medianrule"),
silence_true_errors = FALSE,
missingness = 0.1,
sdm = FALSE,
na.inform = TRUE)
#extractMethods()
## ----ploting2, fig.width = 6, fig.height= 4, fig.align='center'---------------
ggoutliers(setosa_outlier_detection)
## ----clean data extraction and labelling--------------------------------------
setosa_qc_loess <- extract_clean_data(refdata = setosadf,
outliers = setosa_outlier_detection, loess = TRUE)
#clean dataset
nrow(setosa_qc_loess)
#reference data
nrow(setosadf)
setosa_qc_labeled <- classify_data(refdata = setosadf, outliers = setosa_outlier_detection)
## ----clean data extraction and labelling_multiple, fig.height=4.1, fig.width=4.5, fig.align='center'----
ggenvironmentalspace(setosa_qc_labeled,
type = '1D',
ggxangle = 45,
scalecolor = 'viridis',
xhjust = 1,
legend_position = 'blank',
ylab = "Number of records",
xlab = "Outlier labels")
## ----outlier detection using multiple-----------------------------------------
multspp_outlier_detection <- multidetect(data = dfinal,
var = 'Sepal.Width',
multiple = TRUE,
var_col = "Species",
methods = c("adjbox", "iqr", "hampel","jknife",
"seqfences", "mixediqr",
"distboxplot", "semiqr",
"zscore", "logboxplot", "medianrule"),
silence_true_errors = FALSE,
missingness = 0.1,
sdm = FALSE,
na.inform = TRUE)
## ----ploting_single, fig.width = 6, fig.height= 4, fig.align='center'---------
ggoutliers(multspp_outlier_detection)
## ----clean data extraction and labelling mult---------------------------------
multsp_qc_loess <- extract_clean_data(refdata = dfinal,
outliers = multspp_outlier_detection,
var_col = 'Species',
loess = TRUE)
#clean dataset
nrow(multsp_qc_loess)
#reference data
nrow(dfinal)
multi_qc_labeled <- classify_data(refdata = dfinal,
outliers = multspp_outlier_detection,
var_col = 'Species')
## ----visualise data labelling, fig.height=4.5, fig.width=6.5, fig.align='center'----
ggenvironmentalspace(multi_qc_labeled,
type = '1D',
ggxangle = 45,
scalecolor = 'viridis',
xhjust = 1,
legend_position = 'blank',
ylab = "Number of records",
xlab = "Outlier labels")
## ----multiple variables of interest-------------------------------------------
multivariables <- multidetect(data = dfinal, multiple = TRUE,
var = c('Sepal.Length', 'Sepal.Width'), output = 'outlier',
var_col = 'Species',
methods = c('zscore', 'adjbox',
'logboxplot', 'distboxplot',
'iqr', 'semiqr','seqfences','hampel',
'jknife'),
warn = FALSE,
sdm = FALSE)
## ----ploting, fig.width = 6, fig.height= 4, fig.align='center'----------------
ggoutliers(multivariables)
## ----data extraction----------------------------------------------------------
#outliers will be returned to NA for each variable
lenwidth_clean <- extract_clean_data(dfinal, outliers = multivariables,
var_col = 'Species', outlier_to_NA = TRUE, threshold = 0.8)
nrow(lenwidth_clean)
lenwidth_long <- extract_clean_data(dfinal, outliers = multivariables,
var_col = 'Species', outlier_to_NA = FALSE, threshold = 0.8)
nrow(lenwidth_long)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.