Nothing
# ----------------------------------------------------------------------------------------------
#' indmat
#'
#' @param groep.vec vector of grouping labels
#'
#' @return an indicator matrix.
#'
#' @noRd
indmat <- function (groep.vec)
{
elements <- levels(factor(groep.vec))
Y <- matrix(0, nrow = length(groep.vec), ncol = length(elements))
dimnames(Y) <- list(NULL, paste(elements))
for (i in 1:length(elements)) Y[groep.vec == elements[i], i] <- 1
return(Y)
}
# ----------------------------------------------------------------------------------------------
#' Get points of intersection between decision boundaries with plot frame
#'
#' @param x Matrix of points from which boundaries are to be drawn
#' @param bounds Bounds of plotting frame
#'
#' @return Matrix containing x-coo,y-coo, line a intersect with line b
#' @noRd
getintersects<-function(x,bounds){
eq<-matrix(c(0,bounds[3],NA,bounds[2],0,bounds[4],NA,bounds[1]),ncol=2,byrow=TRUE)
intersects<-matrix(ncol=2)
whatlines<-matrix(ncol=2)
n<-5
for(i in 1:(nrow(x)-1)){
for(j in (i+1):nrow(x)){
eq<-rbind(eq,equation(x[i,],x[j,]))
snypunte<-sol(eq[n,],eq[-n,])
countermat<-cbind(rep(n-4,length(nrow(snypunte))),snypunte[,3])
whatlines<-rbind(whatlines,countermat)
intersects<-rbind(intersects,snypunte[,-3])
n<-n+1
}
}
inside_x<-intersects[-1,1]>=(bounds[1]-0.0000001)&intersects[-1,1]<=(bounds[2]+0.0000001)
inside_y<-intersects[-1,2]>=(bounds[3]-0.0000001)&intersects[-1,2]<=(bounds[4]+0.0000001)
inside<-intersects[-1,]
inside<-inside[inside_x&inside_y,]
whatlines<-whatlines[-1,]
return(cbind(inside,whatlines[inside_x&inside_y,]))
}
#' Get equation of line with equal distance between two points
#'
#' @param p1 Point 1 vector
#' @param p2 Point 2 vector
#'
#' @return vector containing slope and intercept
#' @noRd
equation<-function(p1,p2){
#generalise to p-dimensional space. See the following
# link https://math.stackexchange.com/questions/987488/equation-for-a-plane-perpendicular-to-a-line-through-two-given-points#:~:text=If%20the%20plane%20is%20perpendicular,%E2%88%92z0)%3D0.
if(length(p1)>2){
#note corner cases not covered for as of yet
point<-(p1+p2)/2
n<-p1-p2 #normal vector
rhs<-sum(n*point) #n*p
a<-rhs/n[2]
b<--n[1]/n[2]
return(c(b,a))
}
if(p1[2]==p2[2]){
return(c(NA,(p1[1]+p2[1])/2))
}
point<-(p1+p2)/2
b<--1*(p1[1]-p2[1])/(p1[2]-p2[2])
#c=y-mx
a<-point[2]-b*point[1]
return(c(b,a))
}
#' Solve for point where two lines intersect
#'
#' @param v1 Vector containing slope and intercept of line
#' @param v2 Matrix of other lines' slopes and intercepts
#'
#' @return Matrix containing intersections with indicator third column (x,y,ind)
#' @noRd
sol<-function(v1,v2){
#v1 is vector containing slope and intercept >>>c(slope,intercept) of new line
#V2 is matrix containing slopes and intercepts of previous lines Col1=slope, col2=intercept
#This function solves for the point where two lines intersect
x<-numeric()
y<-numeric()
n<-numeric()
for(i in 1:nrow(v2)){
#make provision for lines that are both infinite slope
if(is.na(v2[i,1]) & is.na(v1[1])) next
if(is.na(v1[1])){
x<-append(x,v1[2])
y<-append(y,v2[i,2]+v2[i,1]*v1[2])
n<-append(n,i)
next
}
if(is.na(v2[i,1])){
x<-append(x,v2[i,2])
y<-append(y,v1[2]+v1[1]*v2[i,2])
n<-append(n,i)
next
}
if(v1[1]==v2[i,1]) next
x_coo<-(v2[i,2]-v1[2])/(v1[1]-v2[i,1])
x<-append(x,x_coo)
y<-append(y,v1[1]*x_coo+v1[2])
n<-append(n,i)
}
matrix(c(x,y,n),ncol=3)
}
#' Make a matrix containing line segments on the boundary
#'
#' Coordinates are repeated to make up little line segments
#'
#' @param x entire decision boundary with all intersections
#'
#' @return matrix containing segments on the boundary
#' @noRd
makemat<-function(x){
volgorde<-order(x[,1])
if(length(unique(x[,1]))==1){ #that is, line perfectly vertical, then sort by y
x<-x[!duplicated(round(x[,2],15)),] #round to 15 decimal places to ensure no repeat coordinates. Tuning paramater
volgorde<-order(x[,2])
}
else{ #sort by ascending x
x<-x[!duplicated(round(x[,1],15)),]
volgorde<-order(x[,1])
}
x<-x[volgorde,]
if(nrow(x)==2) return(x) #there is no intersections with any line other than edge
temp1<-matrix(x[1,],ncol=2,byrow=TRUE)
for(i in 2:(nrow(x)-1)){
temp1<-rbind(temp1,x[i,],x[i,])
}
temp1<-rbind(temp1, x[nrow(x),])
return(temp1)
}
#' Compute euclidean distance between two points
#'
#' @param x1 Vector (x,y)
#' @param x2 Vector (x,y). Can also generalise to p-dimensions. Function makes provision
#'
#' @return Euclidean distance - scalar
#' @noRd
euc.dist<- function(x1, x2) {
if(length(x2)>2)
x1<-c(x1,rep(0,length(x2-2)))
sqrt(sum((x1 - x2) ^ 2))
}
#' Obtain minimum two Euclidean distances
#'
#' Distance between a points and a matrix of points
#'
#' @param p1 Point 1 - vector (x,y)
#' @param p2 Matrix of points (x,y). Can generalise to have p columns as well. euc.dist sorts this bad boy out!
#'
#' @return indicator vector indicating which points in p2 are closest and second
#' closest to p1
#' @noRd
minEucDist <- function(p1,p2) {
distances<-apply(p2,MARGIN=1,euc.dist,x1=p1)
closest1<-which(distances==min(distances))
distances[closest1]<-NA
closest2<-which(distances==min(distances,na.rm = TRUE))
return(c(closest1,closest2))
}
#' Determine which parts of line segment constitutes a true boundary
#'
#' Have a line segment of different parts. This function tests each part if
#' it is in fact a separating line between the two classes contained in
#' points parameter
#'
#' @param seg Line segment - matrix [x,y]
#' @param original coordinates of class means - matrix [x,y]. Has been generalised to accept p columns as well.
#' @param points vector showing which two points the proposed line segment
#' is separating - ex (1,2)
#'
#' @return matrix containing coordinates of true boundaries on the line
#' @noRd
TrueBoundary<-function(seg,original,points){
n<-nrow(seg)
midpoints<-matrix(nrow=(nrow(seg)/2),ncol=2)
for(i in 1:(nrow(seg)/2)){
lil<-seg[(2*i-1):(2*i),]
midpoints[i,]<-c(mean(lil[,1]),mean(lil[,2]))
}
#okay so now need to see if it is true boundary or not
discard<-numeric()
for(i in 1:nrow(midpoints)){
close_point<-minEucDist(midpoints[i,],original)
if(!(close_point[1]%in% points & close_point[2]%in% points)){
discard<-append(discard,c(2*i-1,2*i))
}
}
if(length(discard)==0) return(seg)
remaining<-seg[-discard,]
if(nrow(remaining)==0) return(remaining)
#remove all the interior points on the straight line
if(length(unique(remaining[,1]))==1){ #that is, line perfectly vertical,
idx_1<-which(remaining[,2]==max(remaining[,2]))
idx_2<-which(remaining[,2]==min(remaining[,2]))
return(remaining[c(idx_1,idx_2),])
}
idx_1<-which(remaining[,1]==max(remaining[,1]))
idx_2<-which(remaining[,1]==min(remaining[,1]))
return(remaining[c(idx_1,idx_2),])
}
#' Make list containing matrix of segments on each line
#'
#' @param x Output obtained from getintersects function
#'
#' @return List containing matrices of segments
#' @noRd
makelistmat<-function(x){
x[,4]<-x[,4]-4
segments<-list()
for(i in unique(x[,3])){
segments[[i]]<-makemat(x[x[,3]==i | x[,4]==i,1:2])
}
return(segments)
}
#' Order the coordinates of a polygon such that it is circular
#'
#' @param x Polygons in a list
#' @param points Coordinates of class means
#'
#' @return List with polygons, coordinates ordered
#' @noRd
makeCircular<-function(x,points){
for(i in 1:length(x)){
#first center the points
cent_x<-x[[i]][,1]-points[i,1]
cent_y<-x[[i]][,2]-points[i,2]
thetas<-atan(cent_y/cent_x)
for(j in 1:length(thetas)){
if(cent_x[j]<0 & cent_y[j]>0)
thetas[j]<-pi+thetas[j]
if(cent_x[j]<0 & cent_y[j]<0)
thetas[j]<-pi+thetas[j]
if(cent_x[j]>0 & cent_y[j]<0)
thetas[j]<-2*pi+thetas[j]
}
x[[i]]<-x[[i]][order(thetas),]
}
return(x)
}
#' Main entry function to get the prediction regions.
#'
#' @param x Matrix of coordinates - class centers. [x,y,z,p,...]
#' @param bounds Bounds of the plotting frame
#'
#' @return Polygon regions
#'
#' @noRd
#'
predict.regions <- function (x, bounds=graphics::par("usr"))
{
Deets <- getintersects(x=x,bounds=bounds)
Get_segments <- makelistmat(Deets)
TruePoints <- matrix(ncol=2)
for(i in 1:(nrow(x)-1)){
for(j in (i+1):nrow(x)){
TruePoints<-rbind(TruePoints,c(i,j))
}
}
bounds_coors<-matrix(c(bounds[1],bounds[3],
bounds[2],bounds[3],
bounds[2],bounds[4],
bounds[1],bounds[4]),ncol=2,byrow = T)
#need to break early if only have two class means
if(nrow(TruePoints)==2){
PolygonRegion<-list(Get_segments[[1]],Get_segments[[1]])
for(i in 1:4){
idx<-minEucDist(bounds_coors[i,],x)[1]
PolygonRegion[[idx]]<-rbind(PolygonRegion[[idx]],bounds_coors[i,])
}
# order the coordinates such that it is circular
PolygonRegion<-makeCircular(PolygonRegion,x)
return(PolygonRegion)
}
TruePoints<-TruePoints[-1,]
finalsegs<-list()
for(i in 1:nrow(TruePoints)){
finalsegs[[i]]<-TrueBoundary(seg=Get_segments[[i]],original=x,points=TruePoints[i,])
}
PolygonRegion<-list()
#now only need to group together the points to each polygon
for(i in 1:nrow(x)){
index<-c(which(TruePoints[,1]==i),which(TruePoints[,2]==i))
tempmat<-finalsegs[[index[1]]]
if(length(index)==1){
PolygonRegion[[i]]<-tempmat
next
}
for(j in 2:length(index)){
tempmat<-rbind(tempmat,finalsegs[[index[j]]])
}
PolygonRegion[[i]]<-tempmat
}
#next enter corners into relevant matrices
bounds_coors<-matrix(c(bounds[1],bounds[3],
bounds[2],bounds[3],
bounds[2],bounds[4],
bounds[1],bounds[4]),ncol=2,byrow = T)
for(i in 1:4){
idx<-minEucDist(bounds_coors[i,],x)[1]
PolygonRegion[[idx]]<-rbind(PolygonRegion[[idx]],bounds_coors[i,])
}
# order the coordinates such that it is circular
PolygonRegion<-makeCircular(PolygonRegion,x)
return(PolygonRegion)
}
#' Calculates the bounds of the classification regions for 1D plots
#'
#' @param x Vector of coordinates - class centers.
#' @param bounds Bounds of the plotting frame
#'
#' @return Bounds for classification intervals
#'
#' @noRd
predict_regions1D <- function(x, bounds = graphics::par("usr")) {
tmp.Zmeans <- x$Zmeans
tmp.borders <-
(sapply(2:length(tmp.Zmeans), function(x)
(tmp.Zmeans[order(tmp.Zmeans)][x] + tmp.Zmeans[order(tmp.Zmeans)][x - 1])/2))
tmp.vals <- c(min = bounds[1], tmp.borders, max = bounds[2])
tmp.vals <- tmp.vals[order(tmp.vals)]
borders.coords <-
cbind(tmp.vals[1:(length(tmp.vals) - 1), drop = FALSE] , tmp.vals[2:length(tmp.vals), drop =
FALSE])
borders.which <-
sapply(tmp.Zmeans, function(x)
which(x > borders.coords[, 1] & x <= borders.coords[, 2]))
borders.coords <- borders.coords[borders.which, ]
# Returns the classification region borders for each of the classes in the order that the classes are in the bp function
return(borders.coords)
}
# ----------------------------------------------------------------------------------------------
#' Computes the square root of the Manhattan distance
#' An example of a Euclidean embeddable distance metric
#'
#' @param X matrix of samples x variables for computation of samples x samples distance matrix
#'
#' @return a dist object
#' @export
#'
#' @examples
#' sqrtManhattan(iris[,1:4])
sqrtManhattan <- function (X)
{
sqrt(stats::dist(X, method="manhattan"))
}
#' Extended matching coefficient
#'
#' @param X a data frame containing the categorical variables used for computing the EMC distance
#'
#' @return a dist object
#' @export
#'
#' @examples
#' mtdf <- as.data.frame(mtcars)
#' mtdf$cyl <- factor(mtdf$cyl)
#' mtdf$vs <- factor(mtdf$vs)
#' mtdf$am <- factor(mtdf$am)
#' mtdf$gear <- factor(mtdf$gear)
#' mtdf$carb <- factor(mtdf$carb)
#' extended.matching.coefficient(mtdf[,8:11])
#'
extended.matching.coefficient <- function (X)
{
n <- nrow(X)
Dmat <- matrix (0, nrow=n, ncol=n)
for (k in 1:ncol(X))
{
Gk <- indmat(X[,k])
Dmat <- Dmat + matrix (1, nrow=n, ncol=n) - Gk %*% t(Gk)
}
stats::as.dist(sqrt(Dmat))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.