R/main.R

Defines functions clust_canonify bestCriterion buildCriteriaList getCriteriaNames concordance extCriteria intCriteria

Documented in bestCriterion concordance extCriteria getCriteriaNames intCriteria

# ===========================================================================
# File: "main.R"
#                        Created: 2010-04-26 08:23:20
#              Last modification: 2018-07-26 15:18:02
# Author: Bernard Desgraupes
# e-mail: <bernard.desgraupes@u-paris10.fr>
# This is part of the R package 'clusterCrit'.
# ===========================================================================


## 
 # ------------------------------------------------------------------------
 # 
 # "intCriteria" --
 # 
 # Possible values for crit are listed in getCriteriaNames(TRUE).
 # 
 # ------------------------------------------------------------------------
 ##
intCriteria <- function(traj, part, crit)
{
	if (!is.matrix(traj)) {
		stop("argument 'traj' must be a matrix")
	}	
	if (!( is.vector(part) && is.integer(part) )) {
		stop("argument 'part' must be an integer vector")
	}	
	ans <- .Call(cluc_calculateInternalCriteria, traj, clust_canonify(part), buildCriteriaList(crit, TRUE))
    return(ans)
}


## 
 # ------------------------------------------------------------------------
 # 
 # "extCriteria" --
 # 
 # Possible values for crit are listed in getCriteriaNames(FALSE).
 # 
 # ------------------------------------------------------------------------
 ##
extCriteria <- function(part1, part2, crit)
{
	if (!( is.vector(part1) && is.integer(part1) )) {
		stop("argument 'part1' must be an integer vector")
	}	
	if (!( is.vector(part2) && is.integer(part2) )) {
		stop("argument 'part2' must be an integer vector")
	}	
	if (length(part1) != length(part1)) {
		stop("'part1' and 'part2' must be the same length")
	}
	ans <- .Call(cluc_calculateExternalCriteria, clust_canonify(part1), clust_canonify(part2), buildCriteriaList(crit, FALSE))
    return(ans)
}


## 
 # ------------------------------------------------------------------------
 # 
 # "concordance" --
 # 
 # Calculate the table of concordances and discordances (i-e the confusion
 # matrix) between two partitions.
 # 
 # The function returns a 2x2 matrix with the number of pairs belonging or
 # not belonging to the same cluster wrt partition P1 or P2. The vectors P1
 # and P2 must have the same length N. 
 #
 #          | 	1	|	2	|
 #       _____________________
 #        1	|	Nyy	|	Nyn	|
 #        2	|	Nny	|	Nnn	|
 #       _____________________
 # 
 # There are N(N-1)/2 pairs, so Nyy + Nyn + Nny + Nnn = N(N-1)/2.
 # 
 # ------------------------------------------------------------------------
 ##
concordance <- function(part1, part2)
{
	if (!( is.vector(part1) && is.integer(part1) )) {
		stop("argument 'part1' must be an integer vector")
	}	
	if (!( is.vector(part2) && is.integer(part2) )) {
		stop("argument 'part2' must be an integer vector")
	}	
	ans <- .Call(cluc_calculateConcordances, part1, part2)
    return(ans)
}


## 
 # ------------------------------------------------------------------------
 # 
 # "getCriteriaNames" --
 # 
 # The internal criteria list must be kept in synch with the dispatching
 # function cluc_calc_int_criterion() in src/critCalc.f95.
 # 
 # The external criteria list must be kept in synch with the dispatching
 # function cluc_calc_ext_criterion() in src/critCalc.f95.
 # 
 # ------------------------------------------------------------------------
 ##

getCriteriaNames <- function(isInternal) {
	if (isInternal) {
		v <- c(
				"Ball_Hall",
				"Banfeld_Raftery",
				"C_index",
				"Calinski_Harabasz",
				"Davies_Bouldin",
				"Det_Ratio",
				"Dunn",
				"Gamma",
				"G_plus",
				"GDI11","GDI12","GDI13",
				"GDI21","GDI22","GDI23",
				"GDI31","GDI32","GDI33",
				"GDI41","GDI42","GDI43",
				"GDI51","GDI52","GDI53",
				"Ksq_DetW",
				"Log_Det_Ratio",
				"Log_SS_Ratio",
				"McClain_Rao",
				"PBM",
				"Point_Biserial",
				"Ray_Turi",
				"Ratkowsky_Lance",
				"Scott_Symons",
				"SD_Scat",
				"SD_Dis",
				"S_Dbw",
				"Silhouette",
				"Tau",
				"Trace_W",
				"Trace_WiB",
				"Wemmert_Gancarski",
				"Xie_Beni"
			)
	} else {
		v <- c(
				"Czekanowski_Dice",
				"Folkes_Mallows", 
				"Hubert",
				"Jaccard", 
				"Kulczynski", 
				"McNemar",
				"Phi", 
				"Precision", 
				"Rand", 
				"Recall", 
				"Rogers_Tanimoto",
				"Russel_Rao",
				"Sokal_Sneath1",
				"Sokal_Sneath2"
			)
	}
	
	return(v)
}


## 
 # ------------------------------------------------------------------------
 # 
 # "buildCriteriaList" --
 # 
 # ------------------------------------------------------------------------
 ##

buildCriteriaList <- function(crit, isInternal) {
	names <- tolower(getCriteriaNames(isInternal))
	crit <- tolower(crit)
	if (crit[1] == "all") {
		criteria <- names
	} else {
		criteria <- vector(mode="character")
		for (i in 1:length(crit)) {
			if (crit[i] == "gdi") {
				crit[i] <- "gdi11"
			}
			idx <- charmatch(crit[i], names)
			if (is.na(idx)) {
				stop("unknown criterion ",crit[i])
			} else if (idx == 0) {
				stop("ambiguous criterion name ",crit[i])
			} else {
				criteria <- c(criteria, names[idx])
			}
		}
	}
	
	return(criteria)
}



## 
 # ------------------------------------------------------------------------
 # 
 # "bestCriterion" --
 # 
 # Given a vector of clustering index values, return the index of the
 # "best" one in the sense of the specifed criterion.
 # 
 # ------------------------------------------------------------------------
 ##

bestCriterion <- function(x, crit) {
	if (any(is.nan(x))) {
		return(NaN)
	}
	if (any(is.na(x))) {
		return(NA)
	}
	name <- buildCriteriaList(crit, TRUE)[1]
    best <- switch(name,
		"calinski_harabasz" = ,
		"dunn" = ,
		"gdi11" = ,
		"gdi12" = ,
		"gdi13" = ,
		"gdi21" = ,
		"gdi22" = ,
		"gdi23" = ,
		"gdi31" = ,
		"gdi32" = ,
		"gdi33" = ,
		"gdi41" = ,
		"gdi42" = ,
		"gdi43" = ,
		"gdi51" = ,
		"gdi52" = ,
		"gdi53" = ,
		"gamma" = ,
		"pbm" = ,
		"point_biserial" = ,
		"ratkowsky_lance" = ,
		"silhouette" = ,
		"tau" = ,
		"wemmert_gancarski" = which(x==max(x)),
		
		"banfeld_raftery" = ,
		"c_index" = ,
		"davies_bouldin" = ,
		"g_plus" = ,
		"mcclain_rao" = ,
		"ray_turi" = ,
		"scott_symons" = ,
		"sd_scat" = ,
		"sd_dis" = ,
		"s_dbw" = ,
		"xie_beni" = which(x==min(x)),
		
		"ball_hall" = ,
		"ksq_detw" = ,
		"trace_w" = ,
		"trace_wib" = {y <- diff(diff(x))
				which(y==max(y))+1},
		
		"det_ratio" = ,
		"log_det_ratio" = ,
		"log_ss_ratio" = {y <- diff(diff(x))
				which(y==min(y))+1}
	)
				
    return(best[1])
}


## 
 # ------------------------------------------------------------------------
 # 
 # "clust_canonify" --
 # 
 # Make sure the partition vector contains all integer values ranging
 # sequentially from 1.
 # 
 # ------------------------------------------------------------------------
 ##
clust_canonify <- function(part) {
	return(match(part,sort(unique(part))))
}

Try the clusterCrit package in your browser

Any scripts or data that you put into this service are public.

clusterCrit documentation built on Nov. 23, 2023, 5:06 p.m.