R/permutationDistances.r

Defines functions lexicographicPermutationOrderNumber distancePermutationLex distancePermutationCos distancePermutationInsert distancePermutationLee distancePermutationChebyshev distancePermutationManhattan distancePermutationEuclidean distancePermutationHamming distancePermutationPosition2 distancePermutationPosition distancePermutationAdjacency distancePermutationR distancePermutationSwapInv distancePermutationSwap distancePermutationLevenshtein distancePermutationLCStr distancePermutationInterchange

Documented in distancePermutationAdjacency distancePermutationChebyshev distancePermutationCos distancePermutationEuclidean distancePermutationHamming distancePermutationInsert distancePermutationInterchange distancePermutationLCStr distancePermutationLee distancePermutationLevenshtein distancePermutationLex distancePermutationManhattan distancePermutationPosition distancePermutationPosition2 distancePermutationR distancePermutationSwap distancePermutationSwapInv lexicographicPermutationOrderNumber

###################################################################################
#' Interchange Distance for Permutations
#' 
#' The interchange distance is an edit-distance, counting how many edit operation (here: interchanges, i.e., transposition of two arbitrary elements) have to be
#' performed to transform permutation x into permutation y.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Schiavinotto, Tommaso, and Thomas Stuetzle. "A review of metrics on permutations for search landscape analysis." Computers & operations research 34.10 (2007): 3143-3153.
#'
#' @examples
#' x <- 1:5
#' y <- c(1,4,3,2,5)
#' distancePermutationInterchange(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationInterchange)
#'
#' @export
###################################################################################
distancePermutationInterchange <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#x<-y[order(x)]
	#result <- .Call("permutationDistanceInterchange", as.integer(x),as.integer(y), PACKAGE="CEGO")
	result <- .Call(C_permutationDistanceInterchange, as.integer(x),as.integer(y))
	(N-result) / (N-1) 
}

###################################################################################
# Longest Common Subsequence Distance for Permutations
# 
# DEPRECATED, see \code{\link{distancePermutationInsert}}. 
#
# @param x first permutation (integer vector)
# @param y second permutation (integer vector)
#
# @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#
# @export
# @keywords internal
###################################################################################
#distancePermutationLCSeq<- function(x, y){
#	.Deprecated("distancePermutationInsert")
	#N <- length(x)
	#if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistanceLongestCommonSubsequence", as.integer(x),as.integer(y), PACKAGE="CEGO")
	#(N-result)/(N-1) # N-1 is for PERMUTATIONS only, because two permutations have always at least a common string/sequence of length one. 
                  # this will be different for strings where two strings may have no single character in common. permutations always contain all 
                  # of N characters.
	### 
	## insert is identical but faster
	###
#	distancePermutationInsert(x,y)									
#}

###################################################################################
#' Longest Common Substring Distance for Permutations
#' 
#' Distance of permutations. Based on the longest string of adjacent elements that two permutations have in common.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' #' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Hirschberg, Daniel S. "A linear space algorithm for computing maximal common subsequences." Communications of the ACM 18.6 (1975): 341-343.
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationLCStr(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationLCStr)
#'
#' @export
#' 
###################################################################################
distancePermutationLCStr <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistanceLongestCommonSubstring",  as.integer(x),as.integer(y), PACKAGE="CEGO") 
	result <- .Call(C_permutationDistanceLongestCommonSubstring,  as.integer(x),as.integer(y)) 
	(N-result)/(N-1)
}

###################################################################################
#' Levenshtein Distance for Permutations
#' 
#' Levenshtein Distance, often just called "Edit Distance". The number of insertions, substitutions or deletions to turn one permutation (or string of equal length) into another.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Levenshtein, Vladimir I. "Binary codes capable of correcting deletions, insertions and reversals." Soviet physics doklady. Vol. 10. 1966.
#'
#' @examples
#' x <- 1:5
#' y <- c(1,2,5,4,3)
#' distancePermutationLevenshtein(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationLevenshtein)
#'
#' @export
#' 
###################################################################################
distancePermutationLevenshtein <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	.Call(C_permutationDistanceLevenshtein,  as.integer(x),as.integer(y)) / N
}

###################################################################################
#' Swap-Distance for Permutations
#' 
#' The swap distance is an edit-distance, counting how many edit operation (here: swaps, i.e., transposition of two adjacent elements) have to be
#' performed to transform permutation x into permutation y.
#' Note: In v2.4.0 of CEGO and earlier, this function actually computed the swap distance on the inverted permutations 
#' (i.e., on the rankings, rather than orderin).
#' This is now (v2.4.2 and later) corrected by inverting the permutations x and y before computing the distance (ie. computing ordering first).
#' The original behavior can be reproduced by \code{\link{distancePermutationSwapInv}}.
#' This issue was kindly reported by Manuel Lopez-Ibanez and the difference in terms of behavior is discussed by Ekhine Irurozki and him (2021).
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Schiavinotto, Tommaso, and Thomas Stuetzle. "A review of metrics on permutations for search landscape analysis." Computers & operations research 34.10 (2007): 3143-3153.
#' @references Irurozki, Ekhine and Ibanez-Lopez Unbalanced Mallows Models for Optimizing Expensive Black-Box Permutation Problems. In Proceedings of the Genetic and Evolutionary Computation Conference, GECCO 2021. ACM Press, New York, NY, 2021. doi: 10.1145/3449639.3459366 
#'
#' @examples
#' x <- 1:5
#' y <- c(1,2,3,5,4)
#' distancePermutationSwap(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationSwap)
#'
#' @export
#' 
###################################################################################
distancePermutationSwap <- function(x, y){
	N <- length(x)
	#x <- order(x)
	#y <- order(y)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	result <- .Call(C_permutationDistanceSwap, as.integer(x),as.integer(y))
	2*result / (N^2 - N)
}


###################################################################################
#' Inverse-Swap-Distance for Permutations
#' 
#' The swap distance on the inverse of permutations x and y.
#' See \code{\link{distancePermutationSwap}} for non-inversed version.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @examples
#' x <- 1:5
#' y <- c(1,2,3,5,4)
#' distancePermutationSwapInv(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationSwapInv)
#'
#' @export
###################################################################################
distancePermutationSwapInv <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	result <- .Call(C_permutationDistanceSwapInv, as.integer(x),as.integer(y))
	2*result / (N^2 - N)
}

###################################################################################
#' R-Distance for Permutations
#' 
#' R distance or unidirectional adjacency distance. Based on count of number of times that a two element sequence in x also occurs in y, in the same order.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Sevaux, Marc, and Kenneth Soerensen. "Permutation distance measures for memetic algorithms with population management." Proceedings of 6th Metaheuristics International Conference (MIC'05). 2005.
#' @references Reeves, Colin R. "Landscapes, operators and heuristic search." Annals of Operations Research 86 (1999): 473-490.
#'
#' @examples
#' x <- 1:5
#' y <- c(1,2,3,5,4)
#' distancePermutationR(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationR)
#'
#' @export
#' 
###################################################################################
distancePermutationR <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistanceR", as.integer(x), as.integer(y), PACKAGE="CEGO")
	result <- .Call(C_permutationDistanceR, as.integer(x), as.integer(y))
	result / (N - 1)
}

###################################################################################
#' Adjacency Distance for Permutations
#' 
#' Bi-directional adjacency distance for permutations, depending on how often two elements are neighbours in both permutations x and y.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Sevaux, Marc, and Kenneth Soerensen. "Permutation distance measures for memetic algorithms with population management." Proceedings of 6th Metaheuristics International Conference (MIC'05). 2005.
#' @references Reeves, Colin R. "Landscapes, operators and heuristic search." Annals of Operations Research 86 (1999): 473-490.
#'
#' @examples
#' x <- 1:5
#' y <- 5:1
#' distancePermutationAdjacency(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationAdjacency)
#'
#' @export
#' 
###################################################################################
distancePermutationAdjacency <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistanceAdjacency", as.integer(x), as.integer(y), PACKAGE="CEGO")
	result <- .Call(C_permutationDistanceAdjacency, as.integer(x), as.integer(y))
	(N-result-1) / (N - 1)
}

###################################################################################
#' Position Distance for Permutations
#' 
#' Position distance (or Spearmans Correlation Coefficient), scaled to values between 0 and 1. 
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Schiavinotto, Tommaso, and Thomas Stuetzle. "A review of metrics on permutations for search landscape analysis." Computers & operations research 34.10 (2007): 3143-3153.
#' @references Reeves, Colin R. "Landscapes, operators and heuristic search." Annals of Operations Research 86 (1999): 473-490.
#'
#' @examples
#' x <- 1:5
#' y <- c(1,3,5,4,2)
#' distancePermutationPosition(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationPosition)
#'
#' @export
#' 
###################################################################################
distancePermutationPosition <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#dis <- .Call("permutationDistancePosition", as.integer(x), as.integer(y), PACKAGE="CEGO")
	dis <- .Call(C_permutationDistancePosition, as.integer(x), as.integer(y))
	if(N%%2) #scale to [0;1] in case of odd N
		dis <- dis / ((N^2-1)/ 2)
	else #scale to [0;1] in case of even N
		dis <- dis / (N^2 / 2)
	dis	
}

###################################################################################
#' Squared Position Distance for Permutations
#' 
#' Squared position distance (or Spearmans Footrule), scaled to values between 0 and 1. 
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Schiavinotto, Tommaso, and Thomas Stuetzle. "A review of metrics on permutations for search landscape analysis." Computers & operations research 34.10 (2007): 3143-3153.
#' @references Reeves, Colin R. "Landscapes, operators and heuristic search." Annals of Operations Research 86 (1999): 473-490.
#'
#' @examples
#' x <- 1:5
#' y <- c(1,3,5,4,2)
#' distancePermutationPosition2(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationPosition2)
#'
#' @export
#' 
###################################################################################
distancePermutationPosition2 <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistancePosition2", as.integer(x), as.integer(y), PACKAGE="CEGO")
	result <- .Call(C_permutationDistancePosition2, as.integer(x), as.integer(y))
	result / ((N^3-N)/3) 
}

###################################################################################
#' Hamming Distance for Permutations
#' 
#' Hamming distance for permutations, scaled to values between 0 and 1.
#' That is, the number of unequal elements of two permutations, divided by the permutations length.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationHamming(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationHamming)
#'
#' @export
###################################################################################
distancePermutationHamming <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	sum(x != y)/N
}
#distancePermutationHamming <- function(x, y){
#	N <- length(x)
#	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
#	.Call("permutationDistanceHamming", as.integer(x), as.integer(y),PACKAGE="CEGO") / N #this works,but is actually slower than a pure R implementation
#}

###################################################################################
#' Euclidean Distance for Permutations
#' 
#' Euclidean distance for permutations, scaled to values between 0 and 1:
#' \deqn{d(x,y) = \frac{1}{r} \sqrt(\sum_{i=1}^n (x_i - y_i)^2) }{ d(x,y) = 1/r * sqrt(\sum_{i=1}^n  (x_i - y_i)^2)}
#' where n is the length of the permutations x and y, and scaling factor \eqn{r=sqrt(2*4*c*(c+1)*(2*c+1)/6)} with \eqn{c=(n-1)/2} (if n is odd)
#' or \eqn{r=sqrt(2*c*(2*c-1)*(2*c+1)/3)} with \eqn{c=n/2}  (if n is even).
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationEuclidean(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationEuclidean)
#'
#' @export
###################################################################################
distancePermutationEuclidean <- function(x, y){
	N<-length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#result <- .Call("permutationDistanceEuclidean", as.integer(x), as.integer(y), PACKAGE="CEGO") #this works,but is actually slower than a pure R implementation, as below. at least for permutation length 30
	#mdis <- sqrt(result)
	mdis <- sqrt(sum((x-y)^2))
	if(N%%2){ #scale to [0;1] in case of odd N
		n=(N-1)/2
		dis <- mdis / sqrt(8*n*(n+1)*(2*n+1)/6)
	}else{ #scale to [0;1] in case of even N
		n=N/2
		dis <- mdis / sqrt(2*n*(4*n^2-1)/3)
	}	
	dis
}


###################################################################################
#' Manhattan Distance for Permutations
#' 
#' Manhattan distance for permutations, scaled to values between 0 and 1:
#' \deqn{d(x,y) = \frac{1}{r} \sum_{i=1}^n |x_i - y_i| }{ d(x,y) = 1/r * \sum_{i=1}^n  |x_i - y_i|}
#' where n is the length of the permutations x and y, and scaling factor \eqn{r=(n^2-1)/2} (if n is odd)
#' or \eqn{r=((n^2)/2} (if n is even).
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationManhattan(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationManhattan)
#'
#' @export
###################################################################################
distancePermutationManhattan <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	mdis<-sum(abs(x-y))
	if(N%%2) #scale to [0;1] in case of odd N
		dis <- mdis / ((N^2-1)/ 2)
	else #scale to [0;1] in case of even N
		dis <- mdis / (N^2 / 2)
	dis
}

###################################################################################
#' Chebyshev Distance for Permutations
#' 
#' Chebyshev distance for permutations. Specific to permutations is only the scaling to values of 0 to 1:
#' \deqn{d(x,y) = \frac{max(|x - y|) }{ (n-1) } }{d(x,y) = max(|x - y|) / (n-1)}
#' where n is the length of the permutations x and y.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationChebyshev(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationChebyshev)
#'
#' @export
###################################################################################
distancePermutationChebyshev <- function(x, y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	max(abs(x-y)) / (N-1)
}

###################################################################################
#' Lee Distance for Permutations
#' 
#' Usually a string distance, with slightly different definition.
#' Adapted to permutations as: 
#' \deqn{d(x,y) = \sum_{i=1}^n min(|x_i - y_i|), n- |x_i - y_i|)}{ d(x,y) = \sum_{i=1}^n  min(|x_i - y_i|), n- |x_i - y_i|)}
#' where n is the length of the permutations x and y.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Lee, C., "Some properties of nonbinary error-correcting codes," Information Theory, IRE Transactions on, vol.4, no.2, pp.77,82, June 1958
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationLee(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationLee)
#'
#' @export
#' 
###################################################################################
distancePermutationLee <- function(x,y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#mdis <- .Call("permutationDistanceLee", as.integer(x), as.integer(y), PACKAGE="CEGO") #this works,but is actually slower than a pure R implementation, as below. at least for permutation length 30
	mdis <- .Call(C_permutationDistanceLee, as.integer(x), as.integer(y)) #this works,but is actually slower than a pure R implementation, as below. at least for permutation length 30
	#mdis <- sum(pmin(abs(x-y),N-abs(x-y)))
	if(N%%2) #scale to [0;1] in case of odd N
		dis <- mdis / ((N^2-1)/ 2)
	else #scale to [0;1] in case of even N
		dis <- mdis / (N^2 / 2)
	dis
}

###################################################################################
#' Insert Distance for Permutations
#' 
#' The Insert Distance is an edit distance. It counts the minimum number of delete/insert operations
#' required to transform one permutation into another. A delete/insert operation shifts one element to a new position.
#' All other elements move accordingly to make place for the element. E.g., the following shows a single delete/insert move that
#' sorts the corresponding permutation: 1 4 2 3 5 -> 1 2 3 4 5. 
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Schiavinotto, Tommaso, and Thomas Stuetzle. "A review of metrics on permutations for search landscape analysis." Computers & operations research 34.10 (2007): 3143-3153.
#' @references Wikipedia contributors, "Longest increasing subsequence", Wikipedia, The Free Encyclopedia, 12 November 2014, 19:38 UTC, [accessed 13 November 2014] 
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationInsert(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationInsert)
#'
#' @export
#' 
###################################################################################
distancePermutationInsert <- function(x, y){
	N<-length(x) 
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	#x<-order(y)[x] #Important note: this line means "p1 * p2^{-1}". Schiavinotto say it should be  "p2^{-1} * p1" . That seems to be a typo/error. (of course it does not matter which permutation is is inverted. but the inverted permutation should be at the right.). Is now calculated in c code
	#result <- .Call("permutationDistanceInsert", as.integer(x), as.integer(y), PACKAGE="CEGO")
	result <- .Call(C_permutationDistanceInsert, as.integer(x), as.integer(y))
	(N-result)/(N-1)
}

###################################################################################
#' Cosine Distance for Permutations
#' 
#' The Cosine distance for permutations is derived from the Cosine similarity measure
#' which has been applied in fields like text mining.
#' It is based on the scalar product of two vectors (here: permutations).
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @references Singhal, Amit (2001)."Modern Information Retrieval: A Brief Overview". Bulletin of the IEEE Computer Society Technical Committee on Data Engineering 24 (4): 35-43
#'
#' @examples
#' x <- 1:5
#' y <- c(5,1,2,3,4)
#' distancePermutationCos(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationCos)
#'
#' @export
###################################################################################
distancePermutationCos <- function(x,y){
	N=length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	as.numeric(1-((x%*%y)-(N*(N+1)*(N+2)/6))/((N^3-N)/6)) # |a| = |b| because permutation -> |a| * |a| = sum_{i=1}^n i^2 = (N*(N+1)*(2*N+1)/6)
}#the minus factor is tetrahedral pyramid




###################################################################################
#' Lexicographic permutation distance
#' 
#' This function calculates the lexicographic permutation distance. That is the difference of positions
#' that both positions would receive in a lexicographic ordering. Note, that this distance
#' measure can quickly become inaccurate if the length of the permutations grows too large, due
#' to being based on the factorial of the length. In general, permutations longer than 100 elements should
#' be avoided.
#'
#' @param x first permutation (integer vector)
#' @param y second permutation (integer vector)
#'
#' @return numeric distance value \deqn{d(x,y)}, scaled to values between 0 and 1 (based on the maximum possible distance between two permutations)
#'
#' @seealso \code{\link{lexicographicPermutationOrderNumber}}
#'
#' @examples
#' x <- 1:5
#' y <- c(1,2,3,5,4)
#' distancePermutationLex(x,y)
#' p <- replicate(10,sample(1:5),simplify=FALSE)
#' distanceMatrix(p,distancePermutationLex)
#'
#' @export
###################################################################################
distancePermutationLex <- function(x,y){
	N <- length(x)
	if(N!=length(y)|!is.numeric(x)|!is.numeric(y)) stop("Incorrect input to distance function, only permutations of same length are allowed.")
	abs(lexicographicPermutationOrderNumber(x) - lexicographicPermutationOrderNumber(y)) / (factorial(N)-1)
}

###################################################################################
#' Lexicographic order number
#' 
#' This function returns the position-number that a permutation would receive in a lexicographic ordering.
#' It is used in the lexicographic distance measure.
#'
#' @param x permutation (integer vector)
#'
#' @return numeric value giving position in lexicographic order.
#'
#' @seealso \code{\link{distancePermutationLex}}
#'
#' @examples
#' lexicographicPermutationOrderNumber(1:5)
#' lexicographicPermutationOrderNumber(c(1,2,3,5,4))
#' lexicographicPermutationOrderNumber(c(1,2,4,3,5))
#' lexicographicPermutationOrderNumber(c(1,2,4,5,3))
#' lexicographicPermutationOrderNumber(c(1,2,5,3,4))
#' lexicographicPermutationOrderNumber(c(1,2,5,4,3))
#' lexicographicPermutationOrderNumber(c(1,3,2,4,5))
#' lexicographicPermutationOrderNumber(5:1)
#' lexicographicPermutationOrderNumber(1:7)
#' lexicographicPermutationOrderNumber(7:1)
#'
#' @export
#' 
###################################################################################
lexicographicPermutationOrderNumber <- function(x){
	N <- length(x)
	if(!is.numeric(x)) stop("Incorrect input to lexicographicPermutationOrderNumber, should be permutation (numeric vector).")
	#result <- .Call("lexPermOrder", as.integer(x), PACKAGE="CEGO")
	result <- .Call(C_lexPermOrder, as.integer(x))
	sum(result*factorial((N-1):0))+1
}

Try the CEGO package in your browser

Any scripts or data that you put into this service are public.

CEGO documentation built on May 29, 2024, 3:35 a.m.