knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. The document takes input from a SHARKweb filtered download of national marine environmental data to calculate and plot range and outlier data.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.
Get your IQR (Interquartile range) and lower/upper quartile using: lowerq = quantile(data)[2] upperq = quantile(data)[4] iqr = upperq - lowerq #Or use IQR(data) Compute the bounds for a mild outlier: mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) Any data point outside (> mild.threshold.upper or < mild.threshold.lower) these values is a mild outlier To detect extreme outliers do the same, but multiply by 3 instead: extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) Any data point outside (> extreme.threshold.upper or < extreme.threshold.lower) these values is an extreme outlier
Tukey's fences (https://en.wikipedia.org/wiki/Outlier#Tukey.27s_test) Other methods flag observations based on measures such as the interquartile range. For example, if {\displaystyle Q_{1}}Q_{1} and {\displaystyle Q_{3}}Q_{3} are the lower and upper quartiles respectively, then one could define an outlier to be any observation outside the range:
{\displaystyle {\big [}Q_{1}-k(Q_{3}-Q_{1}),Q_{3}+k(Q_{3}-Q_{1}){\big ]}}{\big [}Q_{1}-k(Q_{3}-Q_{1}),Q_{3}+k(Q_{3}-Q_{1}){\big ]} for some nonnegative constant {\displaystyle k}k. John Tukey proposed this test, where {\displaystyle k=1.5}k=1.5 indicates an "outlier", and {\displaystyle k=3}k=3 indicates data that is "far out".[16]
library(tidyverse) setwd("C:/__R/_test_SHARK4R/") readfile = read_delim("sharkweb_data_20201029.txt", delim = "\t", guess_max = 2000, col_names = T, locale = readr::locale(encoding = "latin1", decimal_mark = ",")) colnames(readfile) length(unique(readfile$Parameter)) unique(readfile$Datatyp) readfile = readfile %>% mutate_all(type.convert) %>% mutate_if(is.factor, as.character) unique(readfile$Parameter) readfile = readfile %>% drop_na("Mätvärde")
datatype = readfile %>% filter(Datatyp == "Bacterioplankton") %>% select(Parameter, Mätvärde) message("Bacterioplankton") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Chlorophyll") %>% select(Parameter, Mätvärde) message("Chlorophyll") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Picoplankton") %>% select(Parameter, Mätvärde) message("Picoplankton") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Zooplankton") %>% select(Parameter, Mätvärde) message("Zooplankton") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Phytoplankton") %>% select(Parameter, Mätvärde) message("Phytoplankton") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Primary production") %>% select(Parameter, Mätvärde) message("Primary production") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Seal pathology") %>% select(Parameter, Mätvärde) message("Seal pathology") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Epibenthos") %>% select(Parameter, Mätvärde) message("Epibenthos") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Harbour seal") %>% select(Parameter, Mätvärde) message("Harbour seal") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Grey seal") %>% select(Parameter, Mätvärde) message("Grey seal") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Zoobenthos") %>% select(Parameter, Mätvärde) message("Zoobenthos") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Ringed seal") %>% select(Parameter, Mätvärde) message("Ringed seal") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Sedimentation") %>% select(Parameter, Mätvärde) message("Sedimentation") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
datatype = readfile %>% filter(Datatyp == "Harbour Porpoise") %>% select(Parameter, Mätvärde) message("Harbour Porpoise") for (i in unique(datatype$Parameter)) { print(range(datatype$Mätvärde[which(datatype$Parameter==i)])) print(quantile(datatype$Mätvärde[which(datatype$Parameter==i)])) lowerq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[2] upperq = quantile(datatype$Mätvärde[which(datatype$Parameter==i)])[4] iqr = upperq - lowerq mild.threshold.upper = (iqr * 1.5) + upperq mild.threshold.lower = lowerq - (iqr * 1.5) extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) print(data.frame(PARAMETER = i,mild.threshold.upper,mild.threshold.lower,extreme.threshold.upper,extreme.threshold.lower)) data_vis = datatype %>% filter(Parameter == i) %>% select(Mätvärde) p = ggplot(data_vis,aes(y = Mätvärde, x = i))+ geom_boxplot(outlier.colour = "red")+ geom_hline(yintercept = mild.threshold.upper, colour = "yellow")+ geom_hline(yintercept = extreme.threshold.upper, colour = "red")+ ggtitle(label = i)+ theme_bw() print(p) }
# Date time Sys.time() # Here we store the session info for this script sessioninfo::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.