Nothing
### KNN FUNCTION
KNN_SUM <- function(dataset, k=5){
dataset <- as.matrix(dataset)
n <- nrow(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
knnSum <- NULL
knnSum <- apply(dist.obj$dist, 1, sum)
return(knnSum)
} #requires dbscan
### KNN AGGREGATE
KNN_AGG <- function(dataset, k_min=5, k_max=10){
kRange <- k_min:k_max
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k_min)|!is.numeric(k_max)){
stop('k_min and k_max input must be numeric')
}
if(k_max>n||k_min<1){
stop('k_min input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k_max)
aggDist <- NULL
for(i in 1:n){
kDist <- NULL
for(j in 1:length(kRange)){
kDist[j] <- sum(dist.obj$dist[i,1:kRange[j]])
}
aggDist[i] <- sum(kDist)
}
return(aggDist)
} #requires dbscan
### KNN IN-DEGREE
KNN_IN <- function(dataset, k=5){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
inDegreeVector <- as.vector(dist.obj$id)
inDegreeVector <- tabulate(inDegreeVector)
return(inDegreeVector)
} #requires dbscan
### DB FUNCTION
DB <- function(dataset, d=1, fraction=0.05){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
distMatrix <- as.matrix(dist(dataset))
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
if(!is.numeric(d)||!is.numeric(fraction)){
stop('all inputs (d, fraction) must be numeric')
}
dist.obj <- t(sapply(1:n, function(i){sort(distMatrix[i,])}))
neighborhood <- apply(dist.obj[,-1], 2, function(x)x<d)
neighborhood <- apply(neighborhood, 1, sum)
threshold <- n*fraction
classification <- neighborhood
classification[neighborhood<threshold] <- 'Outlier'
classification[neighborhood>=threshold] <- 'Inlier'
return_list <- list(neighbors=neighborhood, classification=classification)
return(return_list)
} #requires dbscan
### LOCI FUNCTION
LOCI <- function(dataset, alpha=0.5, nn=20, k=3){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
distMatrix <- as.matrix(dist(dataset))
if(!is.numeric(k)|!is.numeric(alpha)|!is.numeric(nn)){
stop('all input parameters alpha, nn and k must be numeric')
}
if(k>n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
npar_pi <- NULL
avg_npar <- NULL
MDEF <- NULL
sd_npar <- NULL
norm_MDEF <- NULL
class <- NULL
for(i in 1:nrow(distMatrix)){
sortVector <- sort(distMatrix[i,])
knn <- sortVector[1:nn]
radius <- knn[[nn]]*alpha
npar <- NULL
for(j in 1:length(knn)){
np_obs <- names(knn)[j]
np_obs_allNN <- distMatrix[np_obs,]
np_obs_radiusNN <- length(np_obs_allNN[which(np_obs_allNN<=radius)])
npar[j] <- np_obs_radiusNN
}
npar_pi[i] <- length(knn[which(knn<=radius)])
avg_npar[i] <- (sum(npar)/nn)
sd_npar[i] <- sd(npar)
MDEF[i] <- 1-(npar_pi[i]/avg_npar[i])
norm_MDEF[i] <- sd_npar[i]/avg_npar[i]
if((MDEF[i]>(k*norm_MDEF[i]))==TRUE){
class[i] <- 'Outlier'
} else
class[i] <- 'Inlier'
}
returnList <- list(npar_pi=npar_pi, avg_npar=avg_npar, sd_npar=sd_npar, MDEF=MDEF, norm_MDEF=norm_MDEF, class=class)
return(returnList)
}
### INFLO FUNCTION
INFLO <- function(dataset, k=5){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
obsDensity <- apply(dist.obj$dist, 1, function(x){1/max(x)})
RNN <- matrix(data=NA, nrow=nrow(dataset), ncol = 1)
avgDensityInfluSpace <- matrix(data=NA, nrow=nrow(dataset), ncol = 1)
INFLO <- NULL
for(i in 1:n){
influSpace <- as.vector(which(dist.obj$id==i, arr.ind = TRUE)[,1])
if(length(influSpace)==0){
RNN[i] <- k
influSpace <- dist.obj$id[i,]
} else {
RNN[i] <- length(influSpace)
}
sumRNNobsDensity <- NULL
for(j in 1:length(influSpace)){
RNNobsDensity <- obsDensity[influSpace[j]]
sumRNNobsDensity[j] <- RNNobsDensity
}
avgDensityInfluSpace[i] <- sum(sumRNNobsDensity)/RNN[i]
INFLO[i] <- avgDensityInfluSpace[i]/obsDensity[i]
}
return(INFLO)
} #requires dbscan
### COF FUNCTION
COF <- function(dataset, k=5){
distMatrix <- as.matrix(dist(dataset))
SBNpathIndex <- list()
acDistMatrix <- matrix(data=NA, nrow = nrow(dataset), ncol = 1)
for(i in 1:nrow(dataset)){
nnVector <- distMatrix[i,]
sVector <- sort(nnVector)
SBNpath <- as.double(names(sVector[1:(1+k)]))
SBNpathIndex[[i]] <- SBNpath[2:(1+k)]
#cost description
costDesc <- NULL
for(j in 1:k){
distance <- distMatrix[SBNpath,SBNpath]
costDesc[j] <- min(distance[j+1, 1:j])
}
#Avg. chaining distance
acDistVector <- NULL
for(h in 1:length(costDesc)){
acDistVector[h] <- ((2*(k+1-h))/(k*(k+1)))*costDesc[h]
}
acDistMatrix[i,] <- sum(acDistVector)
}
#COF score
COF <- NULL
for(g in 1:nrow(dataset)){
acDistOBS <- acDistMatrix[g,]*k
acDistNN <- sum(acDistMatrix[SBNpathIndex[[g]],])
COF[g] <- acDistOBS/acDistNN
}
return(COF)
}
### LOOP FUNCTION
LOOP <- function(dataset, k=5, lambda=3){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(lambda)){
stop('lambda input must be numeric')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
nnSD <- apply(dist.obj$dist, 1, function(x){sqrt((sum(x^2)/k))})
pdist <- lambda*nnSD
plof <- NULL
for(i in 1:n){
plof[i] <- (nnSD[i]/mean(nnSD[dist.obj$id[i,]]))-1
}
nplof <- lambda*sqrt(sum(plof^2)/n)
loop <- pracma::erf(plof/(nplof*sqrt(2)))
loop[loop<0] <- 0
return(loop)
} #requires pracma & dbscan
### LOF FUNCTION
LOF <- function(dataset, k=5){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
lrd <- NULL
LOF <- NULL
for(i in 1:n){
k_dist <- dist.obj$dist[dist.obj$id[i,], k]
dist_po <- dist.obj$dist[i,]
reach_dist <- apply(cbind(k_dist, dist_po), 1, max)
lrd[i] <- 1/(sum(reach_dist)/k)
}
for(i in 1:n){
LOF[i] <- (sum(lrd[dist.obj$id[i,]]/k))/lrd[i]
}
return(LOF)
} #requires dbscan
### LDOF FUNCTION
LDOF <- function(dataset, k=5){
n <- nrow(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
distMatrix <- as.matrix(dist(dataset))
LDOF <- NULL
for(i in 1:n){
vector <- distMatrix[i,]
sVector <- sort(vector)
np <- as.numeric(names(sVector))[2:(k+1)]
dxp <- sum(sVector[2:(k+1)])/k
Dxp <- sum(distMatrix[np,np])/(k*(k-1))
LDOF[i] <- dxp/Dxp
}
return(LDOF)
}
### RDOF FUNCTION
RDOS <- function(dataset, k=5, h=1){
n <- nrow(dataset)
d <- ncol(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)){
stop('k input must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(h)){
stop('h input must be numeric')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
distMatrix <- as.matrix(dist(dataset))
dist.obj <- dbscan::kNN(dataset, k)
#sNN matrix
func.dist <- function(x1, x2) {
length(intersect(x1, x2))
}
sNN_matrix <- as.matrix(proxy::dist(x = dist.obj$id, method = func.dist, diag = T, upper = T))
neighborhood <- list()
#neighborhood loop
for(i in 1:n){
kNN <- dist.obj$id[i,]
rNN <- as.numeric(which(dist.obj$id==i, arr.ind = TRUE)[,1])
sNN <- as.numeric(names(sNN_matrix[i,][sNN_matrix[i,]>0]))
neighborhood[[i]] <- union(kNN, c(rNN, sNN))
}
px <- NULL
#gaussian kernel loop
for(i in 1:n){
Kgaussian <- 1/((2*pi)^(d/2))*exp(-((distMatrix[i, neighborhood[[i]]])/(2*h^2)))
px[i] <- (1/(length(neighborhood[[i]])+1))*sum((1/(h^d))*Kgaussian)
}
RDOS <- NULL
#RDOS
for(i in 1:n){
RDOS[i] <- (sum(px[neighborhood[[i]]]))/(length(neighborhood[[i]])*px[i])
}
return(RDOS)
} #requires dbscan & proxy
### LDF FUNCTION
LDF <- function(dataset, k=5, h=1, c=1){
n <- nrow(dataset)
dim <- ncol(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)||!is.numeric(h)||!is.numeric(c)){
stop('all inputs (k, h, c) must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k)
LDE <- NULL
for(i in 1:n){
k_dist <- dist.obj$dist[dist.obj$id[i,],k]
dist_po <- dist.obj$dist[i,]
reach_dist <- apply(cbind(k_dist, dist_po), 1, max)
kernel <- 1/((((2*pi)^(dim/2)))*((h*k_dist)^dim))*exp(-((reach_dist^2)/(2*((h*k_dist)^2))))
LDE[i] <- (1/k)*sum(kernel)
}
LDF <- NULL
for(i in 1:n){
LDF[i] <- sum(LDE[dist.obj$id[i,]]/k)/(LDE[i]+(c*sum(LDE[dist.obj$id[i,]]/k)))
}
return_list <- list(LDE=LDE, LDF=LDF)
return(return_list)
} #requires dbscan
### RKOF FUNCTION
RKOF <- function(dataset, k=5, C=1, alpha=1, sigma2=1){
n <- nrow(dataset)
dim <- ncol(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k)||!is.numeric(C)||!is.numeric(alpha)||!is.numeric(sigma2)){
stop('all inputs (k, C, alpha, sigma2) must be numeric')
}
if(k>=n||k<1){
stop('k input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
kde <- NULL
wde <- NULL
weight <- list()
dist.obj <- dbscan::kNN(dataset, k)
for(i in 1:n){
k_dist <- dist.obj$dist[dist.obj$id[i,], k]
dist_po <- dist.obj$dist[i,]
fx <- 1/k_dist
kde[i] <- sum((1/((C*k_dist)^alpha)^2)*(1/(2*pi)^(dim/2))*exp(-(dist_po^2/((C*k_dist)^alpha)^2)))/k
weight[[i]] <- exp(-(((k_dist/min(k_dist))-1)^2/(2*sigma2)))
}
RKOF <- NULL
for(i in 1:n){
k_dist <- dist.obj$dist[dist.obj$id[i,], k]
dist_po <- dist.obj$dist[i,]
wde[i] <- sum(weight[[i]]*kde[dist.obj$id[i,]])/sum(weight[[i]])
}
RKOF <- wde/kde
return(RKOF)
} #requires dbscan
### KDEOS FUNCTION
KDEOS <- function(dataset, k_min=5, k_max=10, eps=NULL){
n <- nrow(dataset)
dim <- ncol(dataset)
dataset <- as.matrix(dataset)
if(!is.numeric(k_min)||!is.numeric(k_max)){
stop('k_min & k_max input must both be numeric')
}
if(k_min>k_max){
stop('k_min input must be less or equal to k_max')
}
if(k_max>=n||k_max<1){
stop('k_max input must be less than number of observations and greater than 0')
}
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
dist.obj <- dbscan::kNN(dataset, k_max)
Kgauss <- NULL
KDEmatrix <- matrix(nrow=n, ncol=length(k_min:k_max))
colnames(KDEmatrix) <- k_min:k_max
for(k in k_min:k_max){
for(i in 1:n){
dist_po <- dist.obj$dist[i,(1:k)]
h <- min(mean(dist_po), eps)
Kgauss <- 1/(((2*pi)^(dim/2))*(h^dim))*exp(-0.5*((dist_po)/h^2))
KDEmatrix[i,as.character(k)] <- (1/n)*sum(Kgauss)
}
}
s <- rep(0, n)
for(i in 1:n){
for(k in k_min:k_max){
KDEmean <- mean(KDEmatrix[dist.obj$id[i,(1:k)],as.character(k)])
KDEsd <- sd(KDEmatrix[dist.obj$id[i,(1:k)],as.character(k)])
KDEzscore <- (KDEmean-KDEmatrix[i,as.character(k)])/KDEsd
s[i] <- s[i]+KDEzscore
}
}
s <- s/(k_max-k_min+1)
cummulativeScores <- ecdf(s)
KDEOS <- cummulativeScores(s)
return(KDEOS)
} #requires dbscan
### NOF FUNCTION
NOF <- function(dataset){
dataset <- as.matrix(dataset)
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
n <- nrow(dataset)
r <- 1
nn <- ceiling(sqrt(n))
dist.obj <- dbscan::kNN(dataset, nn)
while(TRUE){
if(r>nn){
nn <- r + 10
dist.obj <- dbscan::kNN(dataset, nn)
}
nb_0 <- tabulate(dist.obj$id[,1:r])
numb <- length(nb_0[nb_0==0])
if(r==1){
numb_upd <- numb
}
if(r!=1 & numb_upd==numb){
break
}
numb_upd <- length(nb_0[nb_0==0])
r=r+1
print(paste('r is now:', r))
}
max_nb <- max(nb_0)
if(max_nb>nn){
dist.obj <- dbscan::kNN(dataset, max_nb)
}
rNN <- sapply(1:n, function(i){as.vector(which(dist.obj$id[,1:max_nb]==i, arr.ind = TRUE)[,1])})
NIS <- list()
for(i in 1:n){
NIS[[i]] <- union(rNN[[i]], dist.obj$id[i,])
}
lrd <- NULL
for(i in 1:n){
k_dist <- as.vector(dist.obj$dist[NIS[[i]],max_nb])
dist_po <- as.vector(apply(rbind(dataset[NIS[[i]],]), 1, function(x){sqrt(sum((x-dataset[i,])^2))}))
reach_dist <- apply(cbind(k_dist, dist_po), 1, max)
lrd[i] <- 1/(sum(reach_dist)/max_nb)
}
NOF <- NULL
for(i in 1:n){
NOF[i] <- sum(lrd[NIS[[i]]])/(length(NIS[[i]])*lrd[i])
}
return_list <- list(nb=nb_0, max_nb=max_nb, r=r, NOF=NOF)
return(return_list)
} #requires dbscan
### NAN FUNCTION
NAN <- function(dataset, NaN_Edges=FALSE){
dataset <- as.matrix(dataset)
if(!is.numeric(dataset)){
stop('dataset input is not numeric')
}
n <- nrow(dataset)
r <- 1
nn <- ceiling(sqrt(n))
dist.obj <- dbscan::kNN(dataset, nn)
while(TRUE){
if(r>nn){
nn <- r + 10
dist.obj <- dbscan::kNN(dataset, nn)
}
NaN_Num <- tabulate(dist.obj$id[,1:r])
NaN_Num_0 <- length(NaN_Num[NaN_Num==0])
if(r==1){
Nan_Num_0_Upd <- NaN_Num_0
}
if(r>1 & Nan_Num_0_Upd==NaN_Num_0){
break
}
Nan_Num_0_Upd <- length(NaN_Num[NaN_Num==0])
r=r+1
print(paste('r update:', r))
}
if(NaN_Edges==TRUE){
NNpairs <- cbind(rep(1:nrow(dataset),each=r), as.vector(t(dist.obj$id[,1:r])))
nLinks <- nrow(NNpairs)
func.pairs <- function(x, y){x %in% y}
pairsMatrix <-
sapply(1:nLinks, function(i)
sapply(1:nLinks, function(j) sum(func.pairs(NNpairs[i,], NNpairs[j,]))))
diag(pairsMatrix) <- 0
n_NaN <- sum(pairsMatrix[upper.tri(pairsMatrix)]==2)
NaN_Edges <- NNpairs[which(pairsMatrix==2, arr.ind = TRUE)[,2],]
} else {
n_NaN <- NULL
NaN_Edges <- NULL
}
return_list <- list(NaN_Num=NaN_Num, r=r, NaN_Edges=NaN_Edges, n_NaN=n_NaN)
return(return_list)
} #requires dbscan
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.