R/pdfCluster.R
In pdfCluster: Cluster Analysis via Nonparametric Density Estimation

Documented in dbs.cluster dbs.pdfCluster groups h.norm hprop2f plot.dbs plot.kepdf plot.kepdf1d plot.pdfCluster summary.dbs summary.pdfCluster

#package
#pdfCluster +pdfClusterPairs

#dyn.load("C:/Documents and Settings/menardi/Documenti/assegnoAA/pacchettoNew/pdfClusterNew/src/macon.dll")
#dyn.load("C:/Documents and Settings/menardi/Documenti/assegnoAA/pacchettoNew/pdfClusterNew/src/kepdf.dll")
#dyn.load("C:/Documents and Settings/menardi/Documenti/assegnoAA/pacchettoNew/pdfClusterNew/src/nc.dll")
#####
# R code for kernel density estimation
# R interfaced with C
		  
kepdf <- function (x, eval.points = x, kernel = "gaussian", bwtype = "fixed", h = h.norm(x), hx = NULL, alpha = 1/2) 
{
    bwtype <- match.arg(bwtype, choices=c("fixed", "adaptive"))
	if (!bwtype%in% c("fixed", "adaptive")) stop("bwtype should be one of 'fixed' or 'adaptive'")
    kernel <- match.arg(kernel, choices=c("gaussian", "t7"))
    if (!kernel%in% c("gaussian", "t7")) stop("kernel should be one of 'gaussian', 't7'")
    x <- data.matrix(x)
    eval.points <- data.matrix(eval.points)
    nx <- as.integer(nrow(x))
    ndim <- as.integer(ncol(x))
    #x <- matrix(as.double(x), nx, ndim)
    neval <- as.integer(nrow(eval.points))
    eval.points <- matrix(as.double(eval.points), neval, ndim)
    f <- as.double(rep(0, neval))
    if (bwtype == "fixed"){ 
        hm <- matrix(as.double(h), nx, ndim, byrow = TRUE)
        par <- list(h = h, hx = hx )
        }
    else if (bwtype == "adaptive") {
           hm <- hx
           if (is.null(hx)|| any(dim(hx)!=c(nx, ndim)))   
           hx <- hm <- matrix(as.double(hprop2f(x = x, h = h, alpha, kernel = kernel)), nx, ndim)
           par <- list(h = h, hx = hx, alpha= alpha )
           }
    rec.hm <- 1/(hm)
    prodhm <-apply(rec.hm, 1, prod) 
    pdf <- switch(kernel,  
        gaussian = .C("c_kepdfN", as.double(x), as.double(rec.hm/sqrt(2)), as.double(eval.points), 
              as.integer(nx), as.integer(ndim), as.integer(neval), 
              double(neval), as.double(prodhm), PACKAGE="pdfCluster"),
        t7 = .C("c_kepdft7", as.double(x), as.double(rec.hm), as.double(eval.points), 
              as.integer(nx), as.integer(ndim), as.integer(neval), 
              double(neval), as.double(prodhm), PACKAGE="pdfCluster")
	)
	new("kepdf", call = match.call(), x = x, eval.points = eval.points, estimate = pdf[[7]], kernel= kernel, bwtype = bwtype, par = par)
	}	

		
 
setClass("kepdf", representation(call="call", x="matrix", eval.points="matrix", 
estimate="numeric", kernel="character", bwtype="character", par="list"))

#function to determine bandwiths

# normal reference rule
h.norm <- function(x){
   x <- as.matrix(x)
   nd <- ncol(x)
   sd <- sqrt(diag(var(x)))
   sd*(4/((nd+2)*nrow(x)))^(1/(nd+4))
 }
 
#smoothing matrix of dimension n x d
		
 hprop2f <- function(x, h=h.norm(x), alpha=1/2, kernel="gaussian" ){
    x<-data.matrix(x)
    nx <- as.integer(nrow(x))
    ndim <- as.integer(ncol(x))
    hm <- matrix(as.double(h), nx, ndim, byrow = TRUE)
    rec.hm <- 1/(hm)
    prodhm <-apply(rec.hm, 1, prod) 
    pdfpil <- switch(kernel,  
        gaussian = .C("c_kepdfN", as.double(x), as.double(rec.hm/sqrt(2)), as.double(x), 
              as.integer(nx), as.integer(ndim), as.integer(nx), 
              double(nx), as.double(prodhm), PACKAGE="pdfCluster"),
        t7= .C("c_kepdft7", as.double(x), as.double(rec.hm), as.double(x), 
              as.integer(nx), as.integer(ndim), as.integer(nx), 
              double(nx), as.double(prodhm), PACKAGE="pdfCluster") 			  
        )
        #cons <- switch (kernel, gaussian = 1/((2 * pi)^(ndim/2) * nx), biweight = (15/16)/nx, triweight = (35/32)/nx)
        #g <- exp(mean(log(pdfpil[[7]]*cons)))
    #lambda <- ((pdfpil[[7]]*cons)/g)^(-alpha)
    g <- exp(mean(log(pdfpil[[7]])))
    lambda <- ((pdfpil[[7]])/g)^(-alpha)
    hm <- outer(lambda, h)
    hm <- matrix(as.double(hm), nx, ndim)
    hm
 }
		

summary.kepdf <- function (object, ..., props=c(75,50,25)) {
	#compute
	if(dim(object@eval.points)[1]==dim(object@x)[1] && all(object@x==object@eval.points)) pdf.obs <- object@estimate else pdf.obs <- kepdf(object@x, bwtype=object@bwtype, kernel=object$kernel, h=object@par$h, hx = object@par$hx, eval.points=object@x)@estimate 
		hts <- quantile(pdf.obs, prob = (100 - props)/100)
	cat("An S4 object of class \"", class(object),"\"","\n","\n",sep="")
	modepos = which.max(pdf.obs)				
	cat("The highest density data point has position", modepos, "in the sample data", "\n","\n",sep=" ")
	indices <- list()
	for(i in 1:length(props)){
		indices[[i]] <- (which(pdf.obs>hts[i]))
		cat("Rows of ", props[i], "% top density data points:", indices[[i]],"\n","\n",sep=" ")
	}				
	#names(indices) <- paste(as.character(props),"%",sep="")
	#new("summary.kepdf", obj.class=class(object)[1], mode = modepos, props = props, indices = indices)
    #show 
    invisible(object)
    }

#setClass("summary.kepdf", representation(obj.class = "character", mode = "numeric", props = "numeric", indices = "list"))

setGeneric("summary", function(object, ...)standardGeneric("summary"))
setMethod("summary", "kepdf", summary.kepdf)


#show methods
setMethod("show","kepdf", function(object){
	cat("An S4 object of class \"",class(object),"\"","\n","\n",sep="")
	cat("Call: ")
	print(object@call)
	cat("\n")
	cat("Kernel: ", "\n")
	print(object@kernel)
	cat("\n")
	if(object@bwtype == "fixed") {
		cat("Estimator type: fixed bandwidth", "\n")
		cat("\n")
		if(ncol(object@x)==1) 
		cat("Smoothing parameter: ", object@par$h, "\n") else 
		cat("Diagonal elements of the smoothing matrix: ", object@par$h, "\n")
		} else {
		cat("Estimator type: adaptive bandwidth", "\n")
		cat("\n")
		cat("Smoothing parameters of observations: ", "\n")
		print(object@par$hx)	
		}
	cat("\n")
	cat("Density estimate at evaluation points: ", "\n")
	print(object@estimate)
	cat("\n")
	})



plot.kepdf1d <- function(x, eval.points=NULL, n.grid=NULL, data=NULL, add=FALSE,
                         main	= NULL, xlab=NULL, ylab=NULL, col=NULL, col.data=2, type="l",...){
  ##########################################
  ##########################################
  # comments by GM 03/02/2011
  # plot of univariate kernel density estimate 
  # DEFAULT: density is evaluated on a grid defined on the range 
  # of evaluation points of the density
  ##########################################
  ##########################################
  if(is.null(n.grid)) n.grid <- 30
  if(is.null(main))	main="Kernel density estimate of data"
  if(is.null(xlab))	xlab="eval.points"
  if(is.null(ylab))	ylab="density function"
  if(is.null(eval.points)) {
    datarange <- apply(x@eval.points, 2, extendrange)
    eval.points <- seq(datarange[1, 1], datarange[2, 1], length=n.grid)
  } 
  pdf <- kepdf(x@x, eval.points=eval.points, kernel = x@kernel, 
               bwtype = x@bwtype, h=x@par$h, hx=x@par$hx)
  pdf.est.sort <- pdf@estimate[order(eval.points)]
  eval.points.sort=sort(eval.points)
  if(is.null(col)) col=1
  if (add){lines(eval.points.sort, pdf.est.sort, col=col,...)} else {
    plot(eval.points.sort, pdf.est.sort, type=type, main=main, xlab=xlab, 
         ylab=ylab, col=col, ...)}
  if (!is.null(data)) rug(data, col=col.data)
  invisible(list(eval.points=as.vector(eval.points), estimate=pdf@estimate))
}

plot.kepdf2d <- function (x, eval.points=NULL, n.grid=NULL, data=NULL,
                          add=FALSE, main=NULL, xlab=NULL, ylab=NULL, zlab=NULL, col=NULL, 
                          col.data=2, props=c(75,50,25), method="contour", ticktype = "detailed",
                          ...) {
  if(is.null(n.grid)) n.grid <-c(30,30)
  if(is.null(main)) main="Kernel density estimate of data"
  if(is.null(eval.points)) {  
    datarange <- apply(x@eval.points, 2, extendrange, f= 0.15)
    xgrid <- seq(datarange[1, 1], datarange[2, 1], length=n.grid[1]) 
    # sort(x@eval.points[,1])
    ygrid <- seq(datarange[1, 2], datarange[2, 2], length=n.grid[2]) 
    # sort(x@eval.points[,2])
  } else { 
    xgrid=sort(eval.points[, 1])
    ygrid=sort(eval.points[, 2])
  }
  method <- match.arg(method,choices=c("contour","perspective","image"))
  nx <- length(xgrid)
  ny <- length(ygrid)
  grid.points <- as.matrix(expand.grid(xgrid,ygrid))
  pdf <- kepdf(x@x, eval.points=grid.points, kernel = x@kernel, 
               bwtype=x@bwtype, h=x@par$h, hx=x@par$hx)
  lab <- colnames(x@x) 
  if(is.null(xlab)) if(is.null(lab)) xlab <- "Var 1" else xlab <- lab[1]
  if(is.null(ylab)) if(is.null(lab)) ylab <- "Var 2" else ylab <- lab[2]
  if(method=="image"){
    if(is.null(col)) col=heat.colors(12)		
    image(xgrid, ygrid, matrix(pdf@estimate, nx,ny), add = add, main=main,
          col=col, xlab=xlab, ylab=ylab, ...)
  } else if(method=="perspective"){
    if (is.null(zlab)) zlab="density function"
    if(is.null(col)) col="lightblue"		
    persp(xgrid,ygrid, matrix(pdf@estimate, nx,ny), main=main, xlab = xlab,
          ylab = ylab, zlab=zlab, col=col, ticktype = "detailed", ...)
  } else {
    if(!add) plot(grid.points, type = "n", main=main, xlab = xlab, 
                  ylab = ylab, ...)
    if(dim(x@eval.points)[1]==dim(x@x)[1] && all(x@x==x@eval.points)) pdf.obs <- x@estimate else pdf.obs <- kepdf(x@x, eval.points=x@x, kernel = x@kernel, bwtype=x@bwtype, h=x@par$h, hx=x@par$hx)@estimate
    hts <- quantile(pdf.obs, prob = (100 - props)/100)
    if(is.null(col)) col=1		
    for (i in 1:length(props)) {
      scale <- props[i]/hts[i]
      contour(xgrid, ygrid, matrix(pdf@estimate, nx,ny) * scale, 
              level = hts[i] *scale, add = TRUE, main=main, col=col, ...)
    }
    if(!is.null(data)) points(data, col=col.data, ...)#aggiunge i punti	
  }
  invisible(list(eval.points = pdf@eval.points, estimate = pdf@estimate))
}		


plot.kepdfdd <- function (x, eval.points= NULL, n.grid=NULL, data=NULL,
                          main=NULL, method="contour", indcol=NULL,
                          text.diag.panel = NULL, gap = 0.5, col=NULL, col.data=2, ...) {
  textPanel <- function(x = 0.5, y = 0.5, txt, cex, font) 
      text(x, y, txt, cex = cex, font = font)
  localAxis <- function(side, x, y, xpd, bg, main,  oma, ...) {
      if (side%%2 == 1) Axis(x, side = side, xpd = NA, ...) else 
      Axis(y, side = side, xpd = NA, ...)
      }
  localPlot <- function(..., oma, font.main, cex.main) plot.kepdf2d(...)
  dots <- list(...)
  nmdots <- names(dots)
  if (!is(x, "kepdf")) stop("argument must be of class \"kepdf\"")
  nc <- ncol(x@x)
  if (is.null(text.diag.panel)) if(!is.null(lab <- colnames(x@x))) text.diag.panel <- lab else text.diag.panel <- paste(rep("V", nc),1:nc,sep="")
  oma <- if ("oma" %in% nmdots) dots$oma else NULL
  if (is.null(oma)) {
    oma <- c(4, 4, 4, 4)
    if (!is.null(main)) 
      oma[3L] <- 6
  }    
  #if (!is.null(main)) main="Pairwise marginal kernel density estimates of data"
  if(is.null(indcol)) indcol <- 1L:nc
  opar <- par(mfrow = c(length(indcol), length(indcol)), mar = rep(c(gap,gap/2),each=2),
 oma=oma)
  on.exit(par(opar))
  out <- list()
  count <- 1
  method <- match.arg(method,choices=c("contour","perspective","image"))
  for (i in indcol){ 
    for (j in indcol) 
      if(i==j) {
        plot(1,type="n",xlab="",ylab="", axes=FALSE)
        text(1,1,text.diag.panel[i],cex=2)
        box()} else {
          if (!is.null(data)) dataji <- data[,c(j,i)] else dataji<-data
          if (!is.null(eval.points)) eval.pointsji <- eval.points[,c(j,i)] else eval.pointsji<-eval.points
          if (!is.null(n.grid)) n.gridji <- n.grid[c(j,i)] else n.gridji<-n.grid
          if (!is.null(x@par$hx)) hxji <- matrix(x@par$hx, ncol=nc)[,c(j,i)] else hxji <-NULL 
          marg <- kepdf(x@x[,c(j,i)], kernel = x@kernel, bwtype=x@bwtype, h = x@par$h[c(j,i)], hx = hxji)
          out[[count]] <- localPlot(x=marg, eval.points = eval.pointsji,
                                    n.grid=n.gridji, data=dataji, xlab = "",
                                    ylab = "", main="", yaxt="n", xaxt="n",
                                    method=method, col=col, col.data=col.data,...)
          count <- count + 1     
          if (method!="perspective"){
            if(i==indcol[1]) {axis(3); if(j==length(indcol)) axis(4)}
            if(j==indcol[1]) {axis(2); if(i==length(indcol)) axis(1)}
            box()} #else if (i == j) {j=j+1}
        }
    count <- count + 1}			
  par(new = FALSE)
  if (!is.null(main)) {
    font.main <- if ("font.main" %in% nmdots) dots$font.main
    else par("font.main") 
    cex.main <- if ("cex.main" %in% nmdots) dots$cex.main
    else par("cex.main") 
    mtext(main, side=3, TRUE, line = 3, outer = TRUE, at = NA, cex = cex.main, font = font.main, adj= 0.5)
  }	
  invisible(out)
}

plot.kepdf <- function(x, y, eval.points = NULL, n.grid = NULL, data = NULL,
                       add = FALSE, main = NULL, xlab = NULL, ylab = NULL, zlab = NULL, col=NULL,
                       col.data=2, type="l", props = c(75,50,25), method="contour", 
                       ticktype = "detailed", indcol = NULL, text.diag.panel = NULL, 
                       gap = 0.5, ...){
  d <- NCOL(x@x)
  if (d==1) plot.kepdf1d(x, eval.points = eval.points, n.grid = n.grid,
                         data=data, add=add, main = main, xlab = xlab, ylab = ylab,
                         col= col, col.data = col.data, type=type, ...) else 
  if (d==2) plot.kepdf2d(x, eval.points = eval.points, n.grid = n.grid,
                         data=data, add=add, main=main, xlab=xlab, ylab=ylab, 
                         zlab=zlab, col=col, col.data = col.data, props = props, 
                         method=method, ticktype = ticktype,	...) else 
  plot.kepdfdd(x, eval.points = eval.points, n.grid = n.grid, data=data,
               main = main, props= props, method = method, indcol = indcol,
               text.diag.panel = text.diag.panel, gap = gap, col=col, col.data=col.data,...) 
}

setMethod("plot", signature(x="kepdf", y="missing"), plot.kepdf)		

	
##########################################################
##### functions, classes and methods related to clustering
##########################################################

setOldClass("dendrogram")  

setClass("pdfCluster", representation(call="call", x="matrix", pdf="list", 
	nc="list",  graph= "list", cluster.cores="ANY", tree="dendrogram", noc="numeric", stages="ANY", clusters="ANY"
	))


#library(geometry)

pdfCluster.data <- function (x, graphtype, hmult, Q="QJ", lambda = 0.10, grid.pairs = 10, n.grid=min(round((5 + sqrt(NROW(x)))*4), NROW(x)), ... ) 
{
 call<-match.call()
 x<-data.matrix(x)
 if(any(sapply(x[1,],is.factor))) stop("All variables must be numeric")
 if(any(apply(x,2,check.discrete)<10)) warning("One or more variables look to be discrete: pdfCluster is designed for continuous data")
 if(any(is.na(x))) {warning(cat("NA in object",deparse(substitute(x)), "have been omitted", "\n"));  x<- na.omit(x)}
  #manage dots
	dots <- list(...)
	ndots <- names(dots)
	args.kepdf <- ndots%in%(names(formals(kepdf)))
        dots.kepdf <- dots[args.kepdf]
        args.pdfClassification <- ndots%in%(names(formals(pdfClassification)))
        dots.pdfClassification <- dots[args.pdfClassification]
	#x <- data.matrix(x)  
	if (missing(hmult)) if (ncol(x)>6) hmult=1 else hmult=0.75          
	if ("h" %in% names(dots.kepdf)) dots.kepdf$h <-dots.kepdf$h*hmult else dots.kepdf$h <- hmult*h.norm(x)	
	pdf <- do.call(kepdf, c(list(x = x, eval.points = x), dots.kepdf))
	estimate <- pdf@estimate
        #check given arguments 
	hasQ<-hasArg(Q)
	haslambda<-hasArg(lambda)
	hasgrid.pairs<-hasArg(grid.pairs)
	n <- nrow(x)
	d <-ncol(x)
    if (n.grid > n) {
        warning("n.grid too large, set equal to n")
        n.grid <- min(n.grid, nrow(x))
    	}
	# build graph
	if (missing(graphtype)) if(d==1) graphtype ="unidimensional" else if (d <= 6) graphtype ="delaunay" else graphtype ="pairs"
	graph.par <- list()
	graph.par$type <- graphtype
	if (NCOL(x)==1) {
		if(hasQ) message("Unused argument 'Q' when graphtype= 'unidimensional'")
		if(haslambda) message("Unused argument 'lambda' when graphtype= 'unidimensional'")
		if(hasgrid.pairs) message("Unused argument 'grid.pairs' when graphtype= 'unidimensional'")
		graph.nb <- graph.uni(x) 
		} else  
	if (graphtype == "delaunay") {
		if(haslambda) message("Unused argument 'lambda' when graphtype= 'delaunay'")
		if(hasgrid.pairs) message("Unused argument 'grid.pairs' when graphtype= 'delaunay'")
      if(NCOL(x)> 6) warning("Running pdfCluster with graphtype='delaunay' may require a long time when the number of variables is so large")		
		graph.nb <- graph.del(x, Q = Q) 
		} else
	if (graphtype == "pairs") {
		if(hasQ) message("Unused argument 'Q' when graphtype= 'pairs'")
		graph.nb <- graph.pairs(x, pdf, lambda = lambda, grid.pairs=grid.pairs)
		graph.par$lambda = lambda; graph.par$comp.area <- graph.nb$comp.areas
		} else 
	stop("graphtype should be one of 'unidimensional', 'delaunay', or 'pairs'")
	nc <- num.con(x, estimate, graph.nb$graph, profile.grid = n.grid-2, correct=TRUE)  
  	    #determine connected components
        struct <- con2tree(nc, estimate)
    if (struct$bad) {
        message("No output given")
		      if (graphtype == "pairs") message ("The grid is too coarse: re-run with a larger value of one among 'n.grid', 'hmult' or 'lambda'. See the 'Warning' Section in 'help(pdfCluster)' for further details.") else message("The grid is too coarse: re-run with larger value of 'n.grid' or hmult. See the 'Warning' Section in 'help(pdfCluster)' for further details.")
      }
	    else {
    	g <- struct$g
        g[struct$g == 0] <- NA
		pdf.comp <-list()
		pdf.comp$kernel <- pdf@kernel
		pdf.comp$bwtype <- pdf@bwtype
		pdf.comp$par <- pdf@par
		pdf.comp$estimate <- estimate
		out <- new("pdfCluster", call = call, x = data.matrix(x), 
            pdf = pdf.comp,  nc = nc, graph = graph.par, cluster.cores = g, tree = struct$tree, noc = struct$noc, 
            stages = NULL, clusters = NULL)
        if ((!"n.stage" %in% ndots)||("n.stage" %in% ndots && dots.pdfClassification$n.stage > 0)) {
           out <- do.call(pdfClassification, c(list(obj = out), dots.pdfClassification))
}
        out
    }
}

# function to find clusters starting from a graph of data

#pdfCluster.graphpairs <- function (x, lambda = 0.10, n.grid=min(50, nrow(as.matrix(x@x))), ... ) 
pdfCluster.graphpairs <- function (x, graphtype, hmult, Q, lambda=0.10, grid.pairs, n.grid=min(round((5 + sqrt(NROW(x@x)))*4), NROW(x@x)), ...)
{
    #manage dots
	dots <- list(...)
	ndots <- names(dots)
        args.pdfClassification <- ndots%in%(names(formals(pdfClassification)))
        dots.pdfClassification <- dots[args.pdfClassification]
	#x is an object of class pdfCluster 
	#containing a graph built with graph.pairs
	if (n.grid > nrow(x@x)) {
        warning("n.grid too large, set equal to n")
        n.grid <- min(n.grid, nrow(x@x))
    }  
	# build graph
	if (x@graph$type!= "pairs") stop (cat(deparse(substitute(x)), "should contain a graph of type 'pairs'"))
	graph.nb <- area.to.graph(x@graph$comp.area, lambda = lambda)	
	graph.par <- list()
	graph.par$type <- graph.nb$type
	graph.par$comp.area <- graph.nb$comp.areas
	graph.par$lambda=lambda
	nc <- num.con(x@x, x@pdf$estimate, graph.nb$graph, profile.grid = n.grid-2, correct=TRUE)  
  	    #determine connected components
        struct <- con2tree(nc,x@pdf$estimate)
    if (struct$bad) {
        message("No output given")
		message ("The grid is too coarse: re-run with a larger value of one among 'n.grid', 'hmult' or 'lambda'. See the 'Warning' Section in 'help(pdfCluster)' for further details.")
    }
	    else {
    	g <- struct$g
        g[struct$g == 0] <- NA
		pdf.comp <-x@pdf
		out <- new("pdfCluster", call = match.call(), x = data.matrix(x@x), 
            pdf = pdf.comp,  nc = nc, graph = graph.par, cluster.cores = g, tree = struct$tree, noc = struct$noc, 
            stages = NULL, clusters = NULL)
        if ((!"n.stage" %in% ndots)||("n.stage" %in% ndots && dots.pdfClassification$n.stage > 0)) {
           out <- do.call(pdfClassification, c(list(obj = out), dots.pdfClassification))
}
        out
    }
}

#setGeneric("pdfCluster", function(x, graphtype, Q="QJ",   ...) standardGeneric("pdfCluster"))
#setMethod("pdfCluster", signature(x="matrix", graphtype = "ANY", Q = "ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="data.frame", graphtype = "ANY", Q = "ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="numeric", graphtype = "ANY", Q = "ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="pdfCluster", graphtype = "missing", Q = "missing"), pdfCluster.graphpairs)

##versione 1:0-0
#setGeneric("pdfCluster", function(x, graphtype, hmult, Q="QJ", lambda = 0.10, grid.pairs = 10, n.grid = min(round((6 + sqrt(NROW(x)))*4), NROW(x)),...) standardGeneric("pdfCluster"))
#setMethod("pdfCluster", signature(x="matrix", graphtype="ANY", hmult= "ANY", Q="ANY", lambda = "ANY", grid.pairs = "ANY", n.grid="ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="data.frame", graphtype="ANY", hmult= "ANY", Q="ANY", lambda = "ANY", grid.pairs = "ANY", n.grid="ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="numeric", graphtype="ANY", hmult= "ANY", Q="ANY", lambda = "ANY", grid.pairs = "ANY", n.grid="ANY"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="pdfCluster", graphtype="missing", hmult= "ANY", Q="missing",lambda="ANY", grid.pairs="missing", n.grid="ANY"), pdfCluster.graphpairs)

##versione 1:0-1
#setGeneric("pdfCluster",function(x, graphtype, hmult, Q="QJ", lambda = 0.10, grid.pairs = 10, n.grid=min(round((6 + sqrt(NROW(x)))*4), NROW(x)),...) standardGeneric("pdfCluster"))
#setMethod("pdfCluster", signature(x="matrix", graphtype="character", hmult="numeric", Q="character", lambda = "numeric", grid.pairs = "numeric", n.grid="numeric"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="data.frame", graphtype="character", hmult="numeric", Q="character", lambda = "numeric", grid.pairs = "numeric", n.grid="numeric"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="numeric", graphtype="character", hmult="numeric", Q="character", lambda = "numeric", grid.pairs = "numeric", n.grid="numeric"), pdfCluster.data)
#setMethod("pdfCluster", signature(x="pdfCluster", graphtype="missing", hmult="missing", Q="missing",lambda="numeric", grid.pairs="missing", n.grid="numeric"), pdfCluster.graphpairs)

#versione 1:0-1
setGeneric("pdfCluster", function(x, graphtype, hmult, Q="QJ", lambda = 0.10, grid.pairs = 10, n.grid = min(round((5 + sqrt(NROW(x)))*4), NROW(x)),...) standardGeneric("pdfCluster"))
setMethod("pdfCluster", signature(x="matrix"), pdfCluster.data)
setMethod("pdfCluster", signature(x="data.frame"), pdfCluster.data)
setMethod("pdfCluster", signature(x="numeric"), pdfCluster.data)
setMethod("pdfCluster", signature(x="pdfCluster"), pdfCluster.graphpairs)

#setClass("summary.pdfCluster", representation(obj.class="character", cluster.cores="numeric", clusters="ANY", tree="dendrogram"))

#summary.pdfCluster <- function(object, ...){
summary.pdfCluster <- function(object,...){
    noc <-object@noc
    t.cluster.cores<-table(groups(object, stage=0),exclude=NULL)
    t.clusters<-table(groups(object))
    maxdigcc<-nchar(max(t.cluster.cores))	
    maxdigc<-nchar(max(t.clusters))	
    cat("An S4 object of class \"", class(object), "\"","\n","\n",sep="")
    cat("Call: ")
    print(object@call)
    cat("\n")
    cat("Initial groupings:","\n")
    cat(" label ", format(c(1:noc, NA), width=maxdigcc),"\n","count ", format(t.cluster.cores, width=maxdigcc),"\n")
    cat("\n")
	cat("Final groupings:","\n");
    cat(" label ", format(1:noc, width=maxdigc),"\n","count ", format(t.clusters, width=maxdigc),"\n")
	cat("\n")
	cat("Groups tree (here 'h' denotes 'height'):\n")
	str(object@tree)
	invisible(object)
}

setMethod("summary",signature("pdfCluster"), summary.pdfCluster)


setMethod("show",signature("pdfCluster"), function(object){
	cat("Clustering via nonparametric density estimation", "\n","\n")
	cat("Call: ")
    print(object@call)
    cat("\n")
	#h  <-  object@hmult*object@h
	#if(ncol(object@x)==1) 
	#cat("Smoothing parameter: ", h, "\n", "\n")
	#else 
	#cat("Diagonal elements of the smoothing matrix: ", h, "\n","\n")
	#cat("Density estimate at data points: ", "\n")
	#print(object@estimate)
	#cat("\n")
    cat("Kernel estimator type:\n")
		print(object@pdf$bwtype)
		cat("\n")
	cat("Graph type:\n")
	    print(object@graph$type)
		cat("\n")
	cat("Groups tree (here 'h' denotes 'height'):\n")
	str(object@tree)
	cat("\n")
  	cat("Initial groupings:", "\n")
      print(groups(object, stage=0))
    cat("\n")
	if(!is.null(groups(object))){
    if (length(object@stages)>1) for(i in seq(1:(length(object@stages)-1))){
	cat("Stage", i, "groupings:","\n")
    print(object@stages[[i]])
    cat("\n")}
	cat("Final groupings:", "\n")
      print(groups(object))
    cat("\n")
		}
   })

groups <- function(obj, stage=length(obj@stages)) {
 if(!is(obj, "pdfCluster")) stop("Function 'groups' extracts groups from objects of class 'pdfCluster'")
 if (stage==0) out <- obj@cluster.cores else {if (is.null(obj@stages)) out <- NULL else out <- obj@stages[[stage]]}
 out
	}

pdfClassification <- function (obj, n.stage = 5, se=TRUE, hcores=FALSE) 
{
    x <- obj@x
    lista <- list()
    g <- obj@cluster.cores
    n <- length(g)
    g[is.na(g)] <- 0
    M <- obj@noc
    kernel <- obj@pdf$kernel 
    if (obj@noc == 1) {
        obj@cluster.cores[g == 0] <- 1
        for (stage in 1:n.stage) lista[[stage]] <- rep(1, nrow(x))
    } else {
        stage <- 1
        while (stage <= n.stage) {
            unallocated <- which(g == 0)
			alp<-as.vector(table(g[g!=0]))/sum(g!=0)
            f <- matrix(NA, nrow = sum(g == 0), ncol = M)
			se2.logf <- matrix(NA, nrow = sum(g == 0), ncol = M)
            if (obj@pdf$bwtype == "fixed")
			{ 
                h0 <- obj@pdf$par$h
				if (hcores){
					for (m in 1:M) {
						indm <- which(g==m)
						h <- h0 * (1 + (stage)/n.stage)
						f[, m] <- kepdf(x = x[indm, ], kernel=kernel, bwtype="fixed", h = h, eval.points = matrix(x[g == 0, ], ncol=ncol(x)))@estimate
						se2.logf[, m] <- 1/(f[, m]*length(indm)*prod(h))	
					}
				} else
					for (m in 1:M) {
					indm <- which(g==m)
					h1 <- max(h.norm(x[g==m,]), 0.0001)
					h <-exp((1-alp[m])*log(h0)+(alp[m])*log(h1))
					f[, m] <- kepdf(x = x[g == m, ], kernel=kernel, bwtype="fixed", h = h, eval.points = matrix(x[g == 0, ], ncol=ncol(x)))@estimate
					se2.logf[, m] <- 1/(f[, m]*length(indm)*prod(h))	
					}				
			} else
			if (obj@pdf$bwtype == "adaptive") 
			{
				h0 <- obj@pdf$par$h 
				hx <- obj@pdf$par$hx
				alpha=obj@pdf$par$alpha
				if (hcores) {
					for (m in 1:M) {
						indm <- which(g==m)
						hx <- hx* (1 + (stage)/n.stage)
						f[, m] <- kepdf(x = x[indm, ], kernel=kernel, bwtype="adaptive", hx = hx[indm, ], eval.points = matrix(x[g == 0, ], ncol=ncol(x)))@estimate
						se2.logf[, m] <- 1/(f[, m]*length(indm)*prod(h0))	
					}
				} else 
					for (m in 1:M) {
						indm <- which(g==m)
						h0x<-hx[indm, ]
						h1x <- hprop2f(x=x[indm,], h=h.norm(x), alpha)
						hxmed <-exp((1-alp[m])*log(h0x)+alp[m]*log(h1x))
						f[, m] <- kepdf(x = x[indm, ], kernel=kernel, bwtype="adaptive", hx = hxmed, eval.points = matrix(x[g == 0, ], ncol=ncol(x)))@estimate
						se2.logf[, m] <- 1/(f[, m]*length(indm)*prod(h0))	
					}
			} 
            unalloc.outliers <- (apply(f, 1, sum) == 0)
            unallocated <- unallocated[!unalloc.outliers]
            f <- matrix(f[!unalloc.outliers, ], ncol = M)
            ind.max <- t(apply(f, 1, order)[rev(1:M), ])
            i1 <- ind.max[, 1]
            i2 <- ind.max[, 2]
            f1 <- diag(as.matrix(f[, i1]))
			f2 <- diag(as.matrix(f[, i2]))
            log.ratio <- log(f1) - log(f2)
            se2.logf1 <- diag(as.matrix(se2.logf[, i1])) 
            se2.logf2 <- diag(as.matrix(se2.logf[, i2]))
			se.log.ratio <- sqrt(se2.logf1 + se2.logf2)
			if (se) log.ratio.sc <- (log(f1) - log(f2))/se.log.ratio else log.ratio.sc <- log.ratio
			if (stage < n.stage) {
                alti <- (log.ratio.sc >= quantile(log.ratio.sc, (n.stage - 
                  stage)/n.stage,na.rm = TRUE))
            } else {
                alti <- rep(TRUE, (sum(g == 0) - sum(unalloc.outliers)))
            }
            g.alti <- ind.max[alti, 1]
            for (m in 1:M) {
                nuovi <- unallocated[alti][g.alti == m]
                g[nuovi] <- m
            }
            lista[[stage]] <- g
            if (sum(g == 0) == 0 & stage < n.stage) {
                message(paste("Classification accomplished in", 
                  stage, "stages"))
                n.stage <- stage
            }
            stage <- stage + 1
        }
        if (any(unalloc.outliers)) 
            warning(cat(sum(unalloc.outliers), "outliers have been found and not classified", 
                "\n"))
    }
    unlista <- unlist(lista)
    unlista[unlista == 0] <- NA
    obj@stages <- split(unlista, rep(1:n.stage, each = nrow(x)))
    obj@clusters <- lista[[n.stage]]
    obj
}

con2tree <- function(object, f){
  # object is the output of num.con()
  # f density estimate
  # 
  ow <- options("warn")
  nc <- object$nc
  p <- object$p
  index <- which(diff(nc) != 0)         # posizione punti di salto
  K <- length(index)  					# numero di punti di salto
  ps <- p[index]      					# frazione di dati inclusi ai vari punti di salto
  lista <- list()     					
  if (K == 1){
  gruppi <- as.vector(object$id[,ncol(object$id)])
  M <-1 } else {
  for(j in 1:K) lista[[j]] <-  as.vector(object$id[,index[j]])  # list of connected sets at the jumps
   gruppi <- rep(0,length(lista[[1]]))    
  M <- 0                                 
  insiemi <- list()
  # step
  k <- 1
  allocated <- (gruppi>0)
  insiemi[[k]] <- setdiff(unique(gruppi),0)
  # loop k=2,...,K
  while(k < K) {
    k <- k+1
    sets  <- lista[[k]]                #elenco connessi al salto k 
    insieme <- list()                   
    for(m in 1:max(sets)) {
      set <- which(sets==m)            
	#which objects are in connected component m?         
      new <- setdiff(set, which(allocated))    
	#which objects in connected component m are not allocated yet?
      if(length(new)>0){               
        #are there new objects in the connected component?  
        g.new <- unique(gruppi[intersect(set, which(allocated))]) 
	# which is the label group of objects in connected component m and has been already allocated?
        if(length(g.new) == 0)  gruppi[set] <- M <- M+1           
        if(length(g.new) == 1)  gruppi[set] <- g.new 
        allocated <- (gruppi>0)
        }
      gg <- sort(setdiff(unique(gruppi[set]),0))
      if(length(gg)>0) insieme[[length(insieme)+1]] <- gg 
    }
    insiemi[[k]] <- insieme
    }
  if(!missing(f)){
    u <- unique(gruppi[rev(order(f))])
    g <- rep(0,length(f))
    u0 <- u[u>0]
    for(i in 1:max(gruppi)) g[gruppi==u0[i]] <- i
    gruppi <- g
    } 
  }
  salti <- diff(nc)
  salta.giu <- rev(which(diff(nc)[index]<0))
  altezza <- numeric(M)
  m <- 0
  salti.su <- salti[salti>0]
       options(warn = -1)
  while(m <M){
    m <- m+1
    r <- min(which(cumsum(salti.su) >= m))
    altezza[m] <- p[salti>0][r]
  }
  bad.grid <- any(is.na(altezza))
  sotto.albero <- function(tree, k, set){
      insieme <- insiemi[[salta.giu[k]]]      
      r <- 0
      branch <- list()
      for(item0 in insieme){
        item <- intersect(set, unlist(item0))
        if(length(item)==1){
          r <- r+1
          u <- item
          attr(u,"members") <- 1
          attr(u,"height") <- altezza[item]
          attr(u,"label")  <- paste(as.character(item), " ", sep="")
          attr(u,"leaf")   <- TRUE
          branch[[r]] <- u
        }     
      if(length(item) > 1) {
        r <- r+1
        u <- sotto.albero(list(), k+1, item)
        attr(u,"members") <- length(unlist(item))
        attr(u,"height") <- max(ps[salta.giu[k+1]])
        attr(u,"label")  <- paste("{", paste(item, collapse=","),"}", sep="")
        attr(u,"leaf")   <- FALSE   
        branch[[r]] <- u
        # browser()
      }
      }
      branch
    }        
  if(M > 1) {
	tree <- sotto.albero(list(), 1, 1:M)
	attr(tree, "members") <- M
	attr(tree, "height") <-  max(ps[salta.giu[1]])
	attr(tree, "label") <- paste("{", paste(1:M, collapse=","),"}", sep="")
	noc <- M } else {
	#bassi.connect<- object$id[ ,(ncol(object$id)-1)] 
	# when there is one group only, cluester cores are all the allocated objects at the lower considered threshold of density
	#gruppi[bassi.connect>0] <- bassi.connect[bassi.connect>0] 
	tree <- list()
	tree[[1]]<-1
	attr(tree[[1]], "members") <- 1
	attr(tree[[1]], "height") <- 0
	attr(tree[[1]], "label") <- "1"
	attr(tree[[1]], "leaf") <- TRUE
    attr(tree,"members")<-1
	attr(tree,"height")<-1
	attr(tree,"label")<-paste("{", paste(1, collapse=","),"}", sep="")
	noc <- 1
	}
	tree <- list(tree)
	attr(tree, "members") <- M
	attr(tree, "height") <- 1
    attr(tree, "class") <- "dendrogram"
  options(warn = ow$warn)
  invisible(list(g=gruppi, tree=tree, bad=bad.grid, noc=noc))
 }
 
#####
plot.pdfCluster <- function(x, y, which=1:4, stage=Inf, pch=NULL, col=NULL, ...){
  if (is.element(4, which) & x@noc==1) which <- setdiff(which, 4)
  w12 <- sort(which)[1:min(2,length(which))]
  if(setequal(1:2,w12)) par(mfrow=c(1,2))
  if(is.element(1, which)){
     nc <- x@nc$nc
     p <- x@nc$p
     plot(c(0,p,1,1), c(0,nc,nc[length(nc)],0), 
           ylim=c(0,max(nc)), yaxt="n", type="S", cex=0.7,
           xlab="fraction of data points",
           ylab="mode function", ...)
     axis(2,at=c(0:max(nc)))
  }
  if(is.element(2, which)) {
    plot(x@tree, center=TRUE, ...)
    title(sub="groups tree")
    }
  if(setequal(1:2,w12)) par(mfrow=c(1,1))
 # if(length(setdiff(which,w12)) > 0) {
  #if(length(setdiff(w12,c(3,4))) > 0 ) {
 #    cat("press <enter> to continue...")
 #    readline()
 #  }
  if(is.element(3, which))    {
    #browser()  
	if(sum(which<3)) {
     cat("press <enter> to continue...")
     readline()
   }
    stage <- min(stage, length(x@stages))
    if(stage==0) {g <- x@cluster.cores; g[is.na(x@cluster.cores)] <- 0}
    else {g <- x@stages[[stage]]; g[is.na(x@stages[[stage]])] <- 0} 
	dat <- x@x
    M <-  length(table(g))
    if (is.null(pch)) #|(length(pch) < M & length(pch) > 1)) {
	pch <- as.character(g)
	if (length(pch) < M & length(pch) > 1) warning("pch argument should have length 1 or equal to the number of clusters")
	if(length(pch) > 1){ 
	newpch <- as.factor(g)
	levels(newpch) <- rep(levels(as.factor(pch)), length(levels(as.factor(pch))))[1:M]
	if (is.numeric(pch)) pch <- as.numeric(as.character(newpch)) else pch <- as.character(newpch)
	} 
	if (is.null(col)){#|(length(col) < M & length(col) > 1)) {
	col=g 
	if (0 %in% g) {
	g0<- g
	g0[g0==0]<-x@noc+1	
	col=g0
	}	
	} 
	if (length(col) < M & length(col) > 1) warning("col argument should have length 1 or equal to the number of clusters")
	if(length(col) > 1){ 
	newc <- col
	newc <- as.factor(newc)
	levels(newc) <- rep(levels(as.factor(col)), length(levels(as.factor(col))))[1:M]
	if (is.numeric(col)) col <- as.numeric(as.character(newc)) else col <- as.character(newc)
	}
	if(ncol(dat)==1){
	if(is.null(list(...)$add))  plot(dat[,1], pch=pch, col=col, cex=.75, ylab="x", ...)
#	if(is.null(list(...)$add))  plot(dat[,1], type="n", ...)
#    for(m in 0:M) {
#      gr <- (g==m)
#      if(m==0) points(seq(along=x)[gr], dat[gr,],  pch=0, cex=0.2)
#      else points(seq(along=x)[gr], dat[gr,], pch=m, col=m+1, cex=0.75)
#      } 
	} else if (ncol(dat)==2){
if(is.null(lab <- colnames(dat))) {lab <- paste(rep("V", ncol(dat)), 1:ncol(dat), sep="")}	
if(is.null(list(...)$add))  plot(dat, pch=pch, col=col, xlab=lab[1], ylab=lab[2], cex=.75, ...)
#	if(is.null(list(...)$add))  plot(dat[,coord], type="n", ...)
#    for(m in 0:M) {
#      gr <- (g==m)
#      if(m==0) {pch=pch[1]; points(dat[gr,coord],  pch=pch , cex=0.2)}
#      else points(dat[gr,coord], pch=pch[m], col=m, cex=0.75)
#      }
	  } else pairs(dat, pch=pch, col=col, ...)
  }
    if(is.element(4, which))    {
if(sum(which<4)) {
     cat("press <enter> to continue...")
     readline()
   }
   plot(dbs(x))
   }
  #browser()
  invisible()
}

setMethod("plot",signature(x="pdfCluster", y="missing"), plot.pdfCluster)

###############################################################################
##### functions, classes and methods related to graph building and CC detection
###############################################################################
  
graph.del <- function(x, Q="QJ") {
   # ----------------------------------------------------------------------
   # Crea la triangolazione e elenca i VICINI PER OGNI PUNTO
 
   # REQUIRES geometry
   # Options implemented for qdelaunay.exe:
   # Qt  - all regions are simplicial (in 2D triangles)
   # Qj  - joggle input all regions are simplicial (in 2D triangles)
   # Fv  - output is a list of points of each triangle
 
   # OUTPUT - for each point, list of connected points
   # ----------------------------------------------------------------------
   x <- as.matrix(x)
   x0 <- x
   sd <- sqrt(diag(var(x)))
   m  <- apply(x,2,mean)
   x.scaled <- t((t(x)-m)/sd)
   ncx <- dim(x.scaled)[2]
   nrx <- dim(x.scaled)[1]
   if (Q=="QT") xt <- delaunayn(x.scaled, options="Qt")
   if (Q=="QJ") xt <- delaunayn(x.scaled, options="QJ")
   on.exit(unlink("qhull_out.txt"))
   x.nb <- vector(mode="list", length=nrx)
   for (i in 1:nrx) {
		x.nb[[i]] <- as.integer(sort(unique(as.vector(xt[(xt==i)%*%rep(1,ncx+1)>0,]), FALSE))) 
	}
   rownames <- as.character(1:length(x.nb))
   list(graph = x.nb, type="delaunay") 
	}
  
#GM 15/03: forse serve ordinare la lista?
graph.uni <- function(x) {
	n <- NROW(x)
	x.nb <- cbind(1:n,1:n,1:n)
	x.nb[order(x)[1:(n-1)],2]<-order(x)[2:n]
	x.nb[order(x)[2:n],3]<-order(x)[(1:(n-1))]
	x.nb <-apply(x.nb,1,unique)
	list(graph = x.nb, type="unidimensional") 
	}

graph.pairs <- function(x, xd, lambda = 0.10, grid.pairs = 10) {
    graph.build <-function(val, ind, pos) sort(c(pos, val[which(ind==pos)], ind[which(val==pos)]))
	comp.areas<-matrix.connected.C(x, xd, grid.pairs = grid.pairs)
	ind.novalley <- which(comp.areas$area <= lambda) 
	x.nb <-lapply(c(1:nrow(x)), graph.build, val=comp.areas$pairs.ord[2,][ind.novalley],ind=comp.areas$pairs.ord[1,][ind.novalley])
	list(graph = x.nb, type="pairs", comp.areas = comp.areas) 
	}	

area.to.graph <- function(comp.areas, lambda = 0.10)	{
	graph.build <-function(val, ind, pos) sort(c(pos, val[which(ind==pos)], ind[which(val==pos)]))
	ind.novalley <- which(comp.areas$area <= lambda)  
	x.nb <-lapply(c(1:max(comp.areas$pairs.ord)), graph.build, val=comp.areas$pairs.ord[2,][ind.novalley],ind=comp.areas$pairs.ord[1,][ind.novalley])
	list(graph = x.nb, type="pairs", comp.areas = comp.areas) 
	}

	
valley.measureC <- function(F, output.complete=FALSE){
	RES <- .C("valley_measure", area=double(1), F0valley=as.double(F), npoints=as.integer(length(F)), PACKAGE="pdfCluster")
	if(output.complete) out <- list(area = RES$area, F0valley = RES$F0valley, F = F) else out=RES$area 
	out
}

apply_valley_measureC <- function(F){
	N <- nrow(F)
	n <- ncol(F)
	.C("apply_valley_measure",	aree=double(N), as.double(t(F)), as.integer(n), as.integer(N), PACKAGE="pdfCluster")[[1]]
}

matrix.connected.C <- function(x, pdf, grid.pairs=10, output.complete=FALSE ){
	n<-nrow(x)
	d<-ncol(x)
	pairs.ord <- combn(1:n,2)
	alpha<-seq(0,1,length=grid.pairs)
	#set a matrix as follows: nrow = n*grid.pairs ncol=d
	#each k-ple of grid.pairs rows is a set of points along the segment joining a pair
	#pairs indicated the order--> 12 13 14 15...23 24 25...34 35...
	new <- as.vector(x[t(pairs.ord)[,1],])%*%t(alpha)+as.vector(x[t(pairs.ord)[,2],])%*%t(1-alpha)
	new <- matrix(as.vector(t(new)),ncol=d)
	F <- kepdf(pdf@x, bwtype=pdf@bwtype, kernel=pdf@kernel,eval.points=new, h=pdf@par$h, hx = pdf@par$hx)@estimate
	F <- matrix(F,ncol=grid.pairs,byrow=T)
	res <- apply_valley_measureC(F)
	list( area = res, pairs.ord = pairs.ord)
}	

num.con<-function (x, estimate, x.nb, pn = 0.9, profile = TRUE, profile.grid = 25, correct=FALSE) {
    n <- length(estimate)
    ngrid <- profile.grid
    if (profile) pn <- seq(0, 1, length = ngrid + 2)[-c(1, ngrid + 2)]
    qn <- as.numeric(quantile(estimate, 1 - pn))
    xd.id <- (matrix(estimate, ncol = 1) %*% (1/qn)) < 1 # TRUE if f < quantile 
    nc.nc <- rep(0, length(qn))
    names(nc.nc) <- format(pn, digit = 3)
    nc.id <- matrix(0, nrow = n, ncol = length(qn))   #14/03 e quindi anche nc.id e nc.id1 al momento
    colnames(nc.id) <- format(pn, digit = 3)
    for (i in 1:length(qn)) {
        ni <- sum(xd.id[, i])
        if (ni < n) {
            x.nbc <- x.nb
            ind <- which(!xd.id[, i])
            for (j in ind) x.nbc[[j]] <- intersect(x.nb[[j]], 
                ind)
            x.nbc[xd.id[, i]] <- as.integer(0)
            nc <- find.nc(x.nbc)
            nc.nc[i] <- nc$nc - ni
            nc.id[xd.id[, i], i] <- -1
            nc.id[!xd.id[, i], i] <- unclass(factor(nc$comp.id[!xd.id[, i]]))
        }
        if (ni == n) {
            nc.nc[i] <- 0
            nc.id[, i] <- -1
        }
        if (correct) {
            tt <- table(nc.id[nc.id[, i] != -1, i])
            ttc <- as.integer(names(tt)[tt <= 1])
            ttp <- as.integer(names(tt)[tt > 1])
            if (length(ttc) > 0) 
                for (l in 1:length(ttc)) nc.id[nc.id[, i] == 
                  ttc[l], i] <- -1
            nc.nc[i] <- length(ttp)
            nc.nc[i] <- length(tt)
        }
        nc.nc[i] <- length(unique(nc.id[, i])) - 1
    }
	#correction 14/06/2011
	if (profile) {
		newnames <- c("0",names(nc.nc),"1")
		nc.nc <- c(0,nc.nc,1)
		names(nc.nc) <- newnames
		nc.id <- cbind(rep(-1,nrow(nc.id)),nc.id,rep(1,nrow(nc.id)))
		pn <- c(0,pn,1)
		qn <- c(max(estimate),qn,min(estimate))
		}
    list(nc = nc.nc, p = pn, id = nc.id, pq = cbind(p = pn, q = qn))
}

 find.nc <- function (nb.obj) 
{
    comp <- integer(length(nb.obj))
    #comp <- .Call("g_components", nb.obj, as.integer(comp), PACKAGE="pdfCluster")
    comp <- .Call("g_components", nb.obj, as.integer(comp), PACKAGE="pdfCluster")
    answ <- list(nc = length(unique(comp)), comp.id = comp)
    answ
}


###################################################
##### functions, classes and methods related to dbs
###################################################


setClass("dbs", representation(call="call", x="matrix", prior="numeric", dbs="numeric", clusters="numeric", noc="numeric", stage="ANY"))


dbs.cluster <- function(x, clusters, h.funct="h.norm", hmult = 1, prior, ...){
	x <- as.matrix(x)
	h.fn <- get(h.funct, inherits = TRUE)
	M <- max(clusters)
	if(M==1) stop("dbs can be computed for 2 groups at least")
	lik.fine <- matrix(NA,nrow=nrow(x),ncol=M)
	if(missing(prior)) prior <- rep(1/M,M) 
	for(m in 1:M){
		h=(h.fn(x=x[clusters==m,]))
		if (sum(h==0)>0) h[h==0]<-1e-10 
		lik.fine[,m] <- kepdf(x=x[clusters==m,],h=h*hmult, eval.points=x, ...)@estimate
		}
	ind <- t(t(lik.fine)*prior)
	den <- apply(ind,1,sum)
	tau_m <- ind/den
	ordered <- t(apply(tau_m,1,order,decreasing = T))
	ind.ug <- which(clusters==ordered[,1])
	dbs.ind <- diag(log(tau_m[,clusters]/tau_m[,ordered[,1]]))
	dbs.ind[ind.ug] <- diag(log(tau_m[ind.ug,ordered[ind.ug,1]]/ind[ind.ug,ordered[ind.ug,2]]))
	dbs.ind[is.infinite(dbs.ind)]<-max(dbs.ind[is.finite(dbs.ind)])+1
	new("dbs", call=match.call(), x=x, prior=prior, dbs=dbs.ind/max(dbs.ind), clusters=clusters, noc=M, stage=NULL)
	}

dbs.pdfCluster <- function(x, h.funct="h.norm", hmult=1, prior=as.vector(table(x@cluster.cores)/sum(table(x@cluster.cores))), stage=NULL, ...){
	if(is.null(stage)) stage <- length(x@stages)
	if (stage == 0) {
		ind.all <- which(!is.na(x@cluster.cores))
		clusters <- x@cluster.cores[ind.all] 
		} else {
		ind.all <- which(!is.na(x@stages[[stage]]))
		clusters <- x@stages[[stage]][ind.all] 
		}
	y <- as.matrix((as.matrix(x@x))[ind.all,])
	h.fn <- get(h.funct, inherits = TRUE)
	M <- max(clusters)
	if(M==1) stop("dbs can be computed for 2 groups at least")
	lik.fine <- matrix(NA,nrow=nrow(y),ncol=M)
	if(missing(prior)) prior <- rep(1/M,M) 
	for(m in 1:M){
		h=(h.fn(x=y[clusters==m,]))
		if (sum(h==0)>0) h[h==0] <- 1e-10 
		lik.fine[, m] <- kepdf(x=y[clusters==m, ],h=h*hmult, eval.points=y, ...)@estimate
		}
	ind <- t(t(lik.fine)*prior)
	den <- apply(ind,1,sum)
	tau_m <- ind/den
	ordered <- t(apply(tau_m,1,order,decreasing = T))
	ind.ug <- which(clusters==ordered[,1])
	dbs.ind <- diag(log(tau_m[,clusters]/tau_m[,ordered[,1]]))
	dbs.ind[ind.ug] <- diag(log(tau_m[ind.ug,ordered[ind.ug,1]]/ind[ind.ug,ordered[ind.ug,2]]))
	dbs.ind[is.infinite(dbs.ind)]<-max(dbs.ind[is.finite(dbs.ind)])+1
	dbs.all <- rep(NA, length(x@cluster.cores))
	dbs.all[ind.all] <- dbs.ind/max(dbs.ind)
	cl <- rep(NA, length(x@cluster.cores))
	cl[ind.all] <- clusters
	new("dbs", call=match.call(), x=x@x, prior=prior, dbs=dbs.all, clusters=cl, noc=M, stage=stage)
	}	

setGeneric("dbs", function(x, clusters, h.funct="h.norm", hmult=1, prior, ...) standardGeneric("dbs"))
setMethod("dbs", signature(x="matrix", clusters="numeric"), dbs.cluster)
setMethod("dbs", signature(x="pdfCluster",clusters="missing"), dbs.pdfCluster)
	
plot.dbs <- function(x, y, xlab="", ylab="", col= NULL, lwd =3, cex=.9, cex.axis=.5, main=NULL, labels=FALSE,...){
	dbs=x@dbs
	noc=x@noc
	if (is.null(col)) cl <- 1:noc else cl=rep(col,noc)
	if (is.null(main)) main="dbs plot" 
	plot(0,axes=F, xlim=c(min(0, min(dbs, na.rm=T)),1), ylim=c(0,sum(!is.na(dbs))),type="n",ylab=ylab, xlab=xlab, main=main, ...)
	ini <- 0
	#lb <- 0
	pos <- numeric(noc)
	lab <- numeric(0)
	ind <- numeric(0)
	for(m in 1:noc){
		lab.gr <- which(x@clusters==m)
		ind.gr <- dbs[lab.gr]
		lab <- c(lab,lab.gr[order(ind.gr)])
		ind.gr <- c(sort(ind.gr))
		ind <- c(ind,ind.gr)
		median.gr <- median(ind.gr)
		fin <- length(lab)
		ini <- (fin-(length(lab.gr)))+1
		segments(rep(0,(length(lab.gr))), ini:fin,ind.gr,ini:fin,lwd=lwd,col=cl[m])
		text(0.65,(ini+fin)/2, paste("cluster median dbs = ", round(median.gr,2)), cex=cex,pos=4,xpd=T)
		#segments(0,ini,0,fin,lwd=4,col=cl[m])
		}
	axis(1, seq(round(min(0, min(dbs, na.rm=T)), 1), 1, by=.1), seq(round(min(0, min(dbs, na.rm=T)), 1), 1, by=.1), cex.axis=cex.axis)
	if (labels) axis(2, at=(1:sum(!is.na(dbs))), lab, cex.axis=cex.axis)
	}

setMethod("plot", signature(x="dbs",y="missing"),  plot.dbs)

summary.dbs <- function(object, ...){
	cat("An S4 object of class", class(object), "\n", "\n")
	cat("Density based silhouette summary of cluster", "\n")
	dbs.groups <- list()
	for(m in 1:max(object@clusters,na.rm=T)){
		ind <- which(object@clusters==m)
		dbs.groups[[m]] <- object@dbs[ind]
		names(dbs.groups[[m]]) <- ind
		cat(m, ":", "\n")
		print(summary(dbs.groups[[m]]))
		cat("\n")
		}
	cat("Density based silhouette summary of data", "\n")
	print(summary(object@dbs))
	invisible(object)
	}

setMethod("summary", "dbs", summary.dbs)



	
setMethod("show", "dbs", function(object){
	cat("Density-based silhouette ", "\n", "\n")
	cat("dbs index: ", "\n")
	print(round(object@dbs,digits=4))
	cat("\n")
	cat("clusters: ", "\n")
	print(object@clusters)
	})






###############################
#other functions


adj.rand.index <- function (cl1, cl2){
	tab <- table(cl1, cl2)
	f2 <- function(n) n*(n-1)/2
	sum.f2 <- function(v) sum(f2(v))
	marg.1 <- apply(tab,1,sum)
	marg.2 <- apply(tab,2,sum)  
	n	<- sum(tab)
	prod <- sum.f2(marg.1)*sum.f2(marg.2)/f2(n)
	num <- (sum.f2(as.vector(tab))- prod)
	den <- 0.5*(sum.f2(marg.1)+sum.f2(marg.2))-prod
	num/den
}


check.discrete <-function (vec) length(unique(vec))
Any scripts or data that you put into this service are public.
pdfCluster documentation built on Dec. 2, 2022, 5:14 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
pdfCluster
Cluster Analysis via Nonparametric Density Estimation

R/pdfCluster.R
In pdfCluster: Cluster Analysis via Nonparametric Density Estimation

Defines functions summary.dbs plot.dbs dbs.pdfCluster dbs.cluster matrix.connected.C apply_valley_measureC valley.measureC area.to.graph graph.pairs graph.uni graph.del plot.pdfCluster con2tree groups summary.pdfCluster plot.kepdf plot.kepdf1d hprop2f h.norm

Documented in dbs.cluster dbs.pdfCluster groups h.norm hprop2f plot.dbs plot.kepdf plot.kepdf1d plot.pdfCluster summary.dbs summary.pdfCluster

Try the pdfCluster package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pdfCluster Cluster Analysis via Nonparametric Density Estimation

R/pdfCluster.R In pdfCluster: Cluster Analysis via Nonparametric Density Estimation

Defines functions summary.dbs plot.dbs dbs.pdfCluster dbs.cluster matrix.connected.C apply_valley_measureC valley.measureC area.to.graph graph.pairs graph.uni graph.del plot.pdfCluster con2tree groups summary.pdfCluster plot.kepdf plot.kepdf1d hprop2f h.norm

Documented in dbs.cluster dbs.pdfCluster groups h.norm hprop2f plot.dbs plot.kepdf plot.kepdf1d plot.pdfCluster summary.dbs summary.pdfCluster

Try the pdfCluster package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pdfCluster
Cluster Analysis via Nonparametric Density Estimation

R/pdfCluster.R
In pdfCluster: Cluster Analysis via Nonparametric Density Estimation