R/visual.R
In clusterGeneration: Random Cluster Generation (with Specified Degree of Separation)

Documented in plot1DProjection plot2DProjection viewClusters

# v1.2.5
#  (1) fixed a bug in the function 'plot2DProjection'. The 'lim' should be
#    'ylim' 
#
# plot separation plane a'(y-eta)=0, i.e. a'y-b=0, where b=a'eta.
# y1 -- cluster 1. Rows correspond to observations. 
#                  Columns correspond to variables
# y2 -- cluster 2. Rows correspond to observations. 
#                  Columns correspond to variables
# projDir -- projection direction
# bw -- the smoothing bandwidth to be used by the R function density()
# alpha -- tuning parameter
# xlab, ylab -- labels of x and y coordinates
# title -- title of the plot
plot1DProjection<-function(
			   y1, 
			   y2, 
			   projDir, 
                           sepValMethod = c("normal", "quantile"), 
			   bw = "nrd0", 
                           xlim = NULL, 
			   ylim = NULL, 
                           xlab = "1-D projected clusters", 
			   ylab = "density estimates", 
                           title = "1-D Projected Clusters and their density estimates",
                           font = 2, 
			   font.lab = 2, 
			   cex = 1.2, 
			   cex.lab = 1.2, 
			   cex.main = 1.5,
                           lwd = 4, 
			   lty1 = 1, 
			   lty2 = 2, 
			   pch1 = 18, 
			   pch2 = 19, 
			   col1 = 2, 
			   col2 = 4, 
			   type = "l",
                           alpha = 0.05, 
			   eps = 1.0e-10, 
			   quiet = TRUE)
{ 
  sepValMethod<-match.arg(sepValMethod, choices=c("normal", "quantile"))
  if(alpha<=0 || alpha>0.5)
  {
    stop("The tuning parameter 'alpha' should be in the range (0, 0.5]!\n")
  }
  if(eps<=0 || eps > 0.01)
  {
    stop("The convergence threshold 'eps' should be between (0, 0.01]!\n")
  }

  x1<-y1%*%projDir
  x2<-y2%*%projDir
  x<-c(x1, x2)
  n1<-length(x1)
  n2<-length(x2)
  n<-n1+n2

  m1<-mean(x1, na.rm=TRUE)
  m2<-mean(x2, na.rm=TRUE)
  if(m1>m2)
  { 
    projDir<- -projDir
    x1<- -x1
    x2<- -x2
    if(!quiet)
    { cat("Warning: Projected cluster 1 is on the right-hand side of projected cluster 2!\n")
     cat("'projDir' is replaced by '-projDir'!\n")
    }
  } 

  x<-c(x1, x2)
  n1<-length(x1)
  n2<-length(x2)

  xx1<-x1
  xx2<-x2
  
  if(sepValMethod=="quantile")
  { if(n1>1) 
    { L1<-quantile(xx1, prob=alpha/2, na.rm=TRUE)
      U1<-quantile(xx1, prob=1-alpha/2, na.rm=TRUE)
    } 
    else 
    { L1<-xx1
      U1<-xx1
    }
    if(n2>1)
    { L2<-quantile(xx2, prob=alpha/2, na.rm=TRUE)
      U2<-quantile(xx2, prob=1-alpha/2, na.rm=TRUE)
    } 
    else 
    { L2<-xx2
      U2<-xx2
    }
  } 
  else 
  { za<-qnorm(1-alpha/2)
    if(n1>1)
    { m1<-mean(xx1, na.rm=TRUE)
      sd1<-sd(c(xx1), na.rm=TRUE)
    } 
    else 
    { m1<-xx1
      sd1<-0
    }
    if(n2>1)
    { m2<-mean(xx2, na.rm=TRUE)
      sd2<-sd(c(xx2), na.rm=TRUE)
    } 
    else 
    { m2<-xx2
      sd2<-0
    }
    L1<-m1-za*sd1
    U1<-m1+za*sd1
    L2<-m2-za*sd2
    U2<-m2+za*sd2
  }

  numer<-(L2-U1)
  denom<-(U2-L1)
  if(abs(denom)<eps) # denominator equal to zero
  { sepVal<- -1 } 
  else { sepVal<-numer/denom }
  
  xlab<-paste(xlab," (sepVal=",round(sepVal,2),", alpha=", round(alpha,2), ")",sep="")

  # obtain density estimation
  if(n1>1)
  { tmp1<-density(xx1, bw=bw)
    tmpx1<-tmp1$x
    tmpy1<-tmp1$y
  } 
  else { tmpx1<-xx1; tmpy1<-1; }
  if(n2>1)
  { tmp2<-density(xx2, bw=bw)
    tmpx2<-tmp2$x
    tmpy2<-tmp2$y
  } 
  else  
  { tmpx2<-xx2
    tmpy2<-1; 
  }
  nn1<-length(tmpx1)
  nn2<-length(tmpx2)

  if(is.null(xlim))
  { xlim<-range(c(xx1, xx2, tmpx1, tmpx2), na.rm=TRUE) }
  if(is.null(ylim))
  { ylim<-range(c(tmpy1, tmpy2, tmpy1, tmpy2), na.rm=TRUE) }


  plotCluster(
	      nn1 = nn1, 
	      nn2 = nn2, 
	      tmpx1 = tmpx1, 
	      tmpy1 = tmpy1, 
	      tmpx2 = tmpx2, 
	      tmpy2 = tmpy2, 
              xlim = xlim, 
	      ylim = ylim, 
	      xlab = xlab, 
	      ylab = ylab, 
	      title = title, 
              font = font, 
	      font.lab = font.lab, 
	      cex = cex, 
	      cex.lab = cex.lab, 
	      cex.main = cex.main, 
	      lwd = lwd, 
              lty1 = lty1, 
	      lty2 = lty2, 
	      pch1 = pch1, 
	      pch2 = pch2, 
	      col1 = col1, 
	      col2 = col2, 
	      type = type)
 
  plotTickLabel(
		x1 = xx1, 
		x2 = xx2, 
		L1 = L1,
		U1 = U1,
		L2 = L2,
		U2 = U2, 
		axis = 1, 
		font = font,
                lwd = lwd, 
		lty1 = lty1, 
		lty2 = lty2, 
		col1 = col1, 
		col2 = col2) #ticks and labels
  invisible(list(sepVal=sepVal, projDir=projDir))
}

# function called by plot1DProjection and plot2DProjection
plotCluster<-function(
		      nn1, 
		      nn2, 
		      tmpx1, 
		      tmpy1, 
		      tmpx2, 
		      tmpy2, 
                      xlim, 
		      ylim, 
		      xlab, 
		      ylab, 
		      title, 
                      font, 
		      font.lab, 
		      cex, 
		      cex.lab, 
		      cex.main, 
		      lwd, 
                      lty1, 
		      lty2, 
		      pch1, 
		      pch2, 
		      col1, 
		      col2, 
		      type = "l")
{
  if(nn1>1 && nn2>1)
  { plot(tmpx1, tmpy1, type=type, 
         xlim=xlim, ylim=ylim, 
         xlab=xlab, ylab=ylab,
         col=col1, font=font, cex=cex,
         font.lab=font.lab, cex.lab=cex.lab, lwd=lwd, lty=lty1, pch=pch1) 
    title(title, cex.main=cex.main)
    points(tmpx2, tmpy2, type=type, col=col2, lwd=lwd, lty=lty2, pch=pch2)
  } 
  else if (nn1>1 && nn2==1) 
  { plot(tmpx1, tmpy1, type=type, 
         xlim=xlim, ylim=ylim, 
         xlab=xlab, ylab=ylab,
         col=col1, font=font, cex=cex,
         font.lab=font.lab, cex.lab=cex.lab, lwd=lwd, lty=lty1, pch=pch1) 
    title(title, cex.main=cex.main)
    points(tmpx2, tmpy2, type="p", col=col2, lty=lty2, pch=pch2) 
  }  
  else if (nn1==1 && nn2>1) 
  { plot(tmpx2, tmpy2, type=type, 
         xlim=xlim, ylim=ylim, 
         xlab=xlab, ylab=ylab,
         col=col2, font=font, cex=cex,
         font.lab=font.lab, cex.lab=cex.lab, lwd=lwd, lty=lty1, pch=pch1) 
    title(title, cex.main=cex.main)
    points(tmpx1, tmpy1, type="p", col=col1, lty=lty2, pch=pch2) 
  } 
  else 
  { plot(x=c(tmpx1), y=c(tmpy1), type="p", 
         xlim=xlim, ylim=ylim, 
         xlab=xlab, ylab=ylab,
         col=col1, font=font, cex=cex,
         font.lab=font.lab, cex.lab=cex.lab, lty=lty1, pch=pch1) 
    title(title, cex.main=cex.main)
    points(x=c(tmpx2), y=c(tmpy2), type="p", col=col2, lty=lty2, pch=pch2) 
  }
}

# Plot ticks and labels along rotated 1st axis
# x1, x2 -- projected cluster 1 and 2 along the 1st axis
# L1, U1 -- lower and upper 1-alpha/2 percentile of x1
# L2, U2 -- lower and upper 1-alpha/2 percentile of x2
plotTickLabel<-function(
			x1, 
			x2, 
			L1, 
			U1, 
			L2, 
			U2, 
			axis = 1, 
			font = 2, 
			lwd = 4,
                        lty1 = 1, 
			lty2 = 2, 
			col1 = 2, 
			col2 = 4)
{ axis(side = axis,at = x1, labels = FALSE, tick = TRUE,col = col1)
  axis(side=axis,at=x2, labels=FALSE, tick=TRUE,col=col2)
  par(mgp=c(3,2,0))
  axis(side=axis,at=c(L1, U1), labels=c("L1", "U1"), tick=TRUE,col=col1,
    lwd=lwd,font=font, lty=lty1)
  axis(side=axis,at=c(L2, U2), labels=c("L2", "U2"), tick=TRUE,col=col2,
    lwd=lwd,font=font, lty=lty2)
  par(mgp=c(3,1,0))
}

# plot 2-D projection
# sepValMethod -- quantile or normal 
#   quantile means use upper and lower alpha quantiles of univariate projections
#   normal means use mean +/- z(alpha)*sd for mean,sd of projections
# y1 -- cluster 1
# y2 -- cluster 2
# alpha -- tuning parameter
# xlab, ylab -- labels of x and y coordinates
# title -- title of the plot
plot2DProjection<-function(
			   y1, 
			   y2, 
			   projDir, 
                           sepValMethod = c("normal", "quantile"), 
                           iniProjDirMethod = c("SL", "naive"), 
                           projDirMethod = c("newton", "fixedpoint"), 
                           xlim = NULL, 
			   ylim = NULL, 
                           xlab = "1st projection direction", 
                           ylab = "2nd projection direction", 
                           title = "Scatter plot of 2-D Projected Clusters",
                           font = 2, 
			   font.lab = 2, 
			   cex = 1.2, 
			   cex.lab = 1, 
			   cex.main = 1.5,
                           lwd = 4, 
			   lty1 = 1, 
			   lty2 = 2, 
			   pch1 = 18, 
			   pch2 = 19, 
			   col1 = 2, 
			   col2 = 4, 
                           alpha = 0.05, 
			   ITMAX = 20, 
			   eps = 1.0e-10, 
			   quiet = TRUE)
{ 
  sepValMethod<-match.arg(sepValMethod, choices=c("normal", "quantile"))
  iniProjDirMethod<-match.arg(arg=iniProjDirMethod, choices=c("SL", "naive"))
  projDirMethod<-match.arg(arg=projDirMethod, choices=c("newton", "fixedpoint"))
  if(alpha<=0 || alpha>0.5)
  {
    stop("The tuning parameter 'alpha' should be in the range (0, 0.5]!\n")
  }
  ITMAX<-as.integer(ITMAX)
  if(ITMAX<=0 || !is.integer(ITMAX))
  {
    stop("The maximum iteration number allowed 'ITMAX' should be a positive integer!\n")
  }
  if(eps<=0 || eps > 0.01)
  {
    stop("The convergence threshold 'eps' should be in (0, 0.01]!\n")
  }
  if(!is.logical(quiet))
  {
    stop("The value of the quiet indicator 'quiet' should be logical, i.e., either 'TRUE' or 'FALSE'!\n")
  }

  # obtain first two rotated coordinates
  tmp<-getRotateData(
		     y1 = y1, 
		     y2 = y2, 
		     projDir = projDir, 
		     iniProjDirMethod = iniProjDirMethod, 
		     projDirMethod = projDirMethod,
                     alpha = alpha, 
		     ITMAX = ITMAX, 
		     eps = eps, 
		     quiet = quiet) 
  Q2<-tmp$Q2 # rotation matrix
  cl<-tmp$cl
  x<-tmp$x

  x1<-x[cl==1,1]
  x2<-x[cl==2,1]
  tmpy1<-x[cl==1,2]
  tmpy2<-x[cl==2,2]

  mx1<-mean(x1, na.rm=TRUE)
  mx2<-mean(x2, na.rm=TRUE)
  if(mx1>mx2)
  { 
    x1<- -x1
    x2<- -x2
    Q2[,1]<- -Q2[,1]
    if(!quiet)
    { cat("Warning: Projected cluster 1 is on the right-hand side of projected cluster 2 along the 1st projection direction!\n")
      cat("'projDir' is replaced by '-projDir'!\n")
    }
  } 
  tmpx<-c(x1, x2)

  my1<-mean(tmpy1, na.rm=TRUE)
  my2<-mean(tmpy2, na.rm=TRUE)
  if(my1>my2)
  { 
    tmpy1<- -tmpy1
    tmpy2<- -tmpy2
    Q2[,1]<- -Q2[,1]
    if(!quiet)
    { cat("Warning: Projected cluster 1 is on the right-hand side of projected cluster 2 along the 1st projection direction!\n")
     cat("'projDir' is replaced by '-projDir'!\n")
    }
  } 
  tmpy<-c(tmpy1, tmpy2)

  xx1<-x1
  xx2<-x2
  yy1<-tmpy1
  yy2<-tmpy2
  n1<-length(xx1)
  n2<-length(xx2)

  if(sepValMethod=="quantile")
  { if(n1>1) 
    { Lx1<-quantile(xx1, prob=alpha/2, na.rm=TRUE)
      Ux1<-quantile(xx1, prob=1-alpha/2, na.rm=TRUE)
      Ly1<-quantile(yy1, prob=alpha/2, na.rm=TRUE)
      Uy1<-quantile(yy1, prob=1-alpha/2, na.rm=TRUE)
    } 
    else 
    { Lx1<-xx1
      Ux1<-xx1
      Ly1<-yy1
      Uy1<-yy1
    }
    if(n2>1)
    { Lx2<-quantile(xx2, prob=alpha/2, na.rm=TRUE)
      Ux2<-quantile(xx2, prob=1-alpha/2, na.rm=TRUE)
      Ly2<-quantile(yy2, prob=alpha/2, na.rm=TRUE)
      Uy2<-quantile(yy2, prob=1-alpha/2, na.rm=TRUE)
    } 
    else 
    { Lx2<-xx2
      Ux2<-xx2
      Ly2<-yy2
      Uy2<-yy2
    }
  } 
  else 
  { za<-qnorm(1-alpha/2)
    if(n1>1)
    { mx1<-mean(xx1, na.rm=TRUE)
      sdx1<-sd(c(xx1), na.rm=TRUE)
      my1<-mean(yy1, na.rm=TRUE)
      sdy1<-sd(c(yy1), na.rm=TRUE)
    } 
    else 
    { mx1<-xx1
      sdx1<-0
      my1<-yy1
      sdy1<-0
    }
    if(n2>1)
    { mx2<-mean(xx2, na.rm=TRUE)
      sdx2<-sd(c(xx2), na.rm=TRUE)
      my2<-mean(yy2, na.rm=TRUE)
      sdy2<-sd(c(yy2), na.rm=TRUE)
    } 
    else 
    { mx2<-xx2
      sdx2<-0
      my2<-yy2
      sdy2<-0
    }
    Lx1<-mx1-za*sdx1
    Ux1<-mx1+za*sdx1
    Lx2<-mx2-za*sdx2
    Ux2<-mx2+za*sdx2
    Ly1<-my1-za*sdy1
    Uy1<-my1+za*sdy1
    Ly2<-my2-za*sdy2
    Uy2<-my2+za*sdy2
  }

  numerx<-(Lx2-Ux1)
  denomx<-(Ux2-Lx1)
  numery<-(Ly2-Uy1)
  denomy<-(Uy2-Ly1)
  if(abs(denomx)<eps) # denominator equal to zero
  { sepValx<- -1 } 
  else { 
    sepValx<-numerx/denomx 
  }
  if(abs(denomy)<eps) # denominator equal to zero
  { sepValy<- -1 } 
  else { sepValy<-numery/denomy }
  
  xlab<-paste(xlab," (sepValx=",round(sepValx,2),", alpha=", round(alpha,2), ")",sep="")
  ylab<-paste(ylab," (sepValy=",round(sepValy,2),", alpha=", round(alpha,2), ")",sep="")

  if(missing(xlim)) { xlim<-range(tmpx, na.rm = TRUE) } 
  else { xlim<-xlim }
  if(missing(ylim)) { ylim<-range(tmpy, na.rm = TRUE) } 
  else { ylim<-ylim }

  xlim[1]<-min(xlim[1], Lx1, Lx2, na.rm = TRUE)
  xlim[2]<-max(xlim[2], Ux1, Ux2, na.rm = TRUE)
  ylim[1]<-min(ylim[1], Ly1, Ly2, na.rm = TRUE)
  ylim[2]<-max(ylim[2], Uy1, Uy2, na.rm = TRUE)

  plotCluster(
	      nn1 = n1, 
	      nn2 = n2, 
	      tmpx1 = xx1, 
	      tmpy1 = yy1, 
	      tmpx2 = xx2, 
	      tmpy2 = yy2, 
              xlim = xlim, 
	      ylim = ylim, 
	      xlab = xlab, 
	      ylab = ylab, 
	      title = title, 
              font = font, 
	      font.lab = font.lab, 
	      cex = cex, 
	      cex.lab = cex.lab, 
	      cex.main = cex.main, 
	      lwd = lwd, 
              lty1 = lty1, 
	      lty2 = lty2, 
	      pch1 = pch1, 
	      pch2 = pch2, 
	      col1 = col1, 
	      col2 = col2, 
	      type="p")

  plotTickLabel(
		x1 = x1,
		x2 = x2,
		L1 = Lx1,
		U1 = Ux1,
		L2 = Lx2,
		U2 = Ux2, 
		axis=1, 
		font = font.lab, 
                lwd = lwd, 
		lty1 = lty1, 
		lty2 = lty2, 
		col1 = col1, 
		col2 = col2) 
  plotTickLabel(
		x1 = tmpy1,
		x2 = tmpy2,
		L1 = Ly1,
		U1 = Uy1,
		L2 = Ly2,
		U2 = Uy2, 
		axis=2, 
		font = font.lab, 
                lwd = lwd, 
		lty1 = lty1 , 
		lty2 = lty2, 
		col1 = col1, 
		col2 = col2) 

  invisible(list(sepValx=sepValx, sepValy=sepValy, Q2=Q2))

}

# Obtain first two coordinates of the rotated data sets
# y1, y2 -- clusters 1 and 2
# projDir -- projection direction
# alpha, ITMAX, eps, quiet -- same as other functions
getRotateData<-function(
			y1, 
			y2, 
			projDir, 
                        iniProjDirMethod = c("SL", "naive"), 
                        projDirMethod = c("newton", "fixedpoint"), 
                        alpha = 0.05, 
			ITMAX = 10, 
                        eps = 1.0e-10, 
			quiet = TRUE)
{ 
  iniProjDirMethod<-match.arg(arg=iniProjDirMethod, choices=c("SL", "naive"))
  projDirMethod<-match.arg(arg=projDirMethod, choices=c("newton", "fixedpoint"))
  if(is.matrix(y1)==TRUE) { n1<-nrow(y1) } 
  else { n1<-1 }
  if(is.matrix(y2)==TRUE) { n2<-nrow(y2) } 
  else { n2<-1 }
  if(n1==1) 
  { mu1<-as.vector(y1)
    p<-length(y1) 
  } 
  else 
  { mu1<-apply(y1, 2, mean, na.rm=TRUE) 
    p<-ncol(y1)
  }
  if(n2==1) { mu2<-as.vector(y2) } 
  else { mu2<-apply(y2, 2, mean, na.rm=TRUE) }
  if(n1==1 || n2==1)
  { cl<-rep(1:2, c(n1, n2))
    y1<-matrix(y1, nrow=n1, ncol=p)
    y2<-matrix(y2, nrow=n2, ncol=p)
    y<-rbind(y1,y2)
    out.label<-NULL
  }
  else  
  { y<-as.matrix(rbind(y1, y2)); cl<-rep(1:2, c(n1, n2)); }
  p<-ncol(y)
  if(p<2)
  { stop("Dimension should be greater than 2!\n") }
  # construct an orthogonal matrix whose first column is projDir 
  Q<-MOrthogonal(M = projDir)
  # rotate data
  ry<-y%*%Q
  if(p>2)
  { ry1<-ry[,-1] # delete the first dimension
    # we will find a optimal direction in the rest of dimension.
    # the obtained optimal direction will orthogonal with "projDir"
    if(n1>1){tmpy1<-ry1[cl==1,]}else{tmpy1<-matrix(ry1,nrow=1,ncol=p-1)}
    if(n2>1){tmpy2<-ry1[cl==1,]}else{tmpy2<-matrix(ry1,nrow=1,ncol=p-1)}

    tmp<-projDirData(
		     y1 = tmpy1, 
		     y2 = tmpy2, 
		     iniProjDirMethod = iniProjDirMethod, 
		     projDirMethod = projDirMethod, 
                     alpha = alpha, 
		     ITMAX = ITMAX, 
		     eps = eps, 
		     quiet = quiet)
    aa<-c(0, tmp$projDir) 

    aa<-as.vector(Q%*%aa) # aa^T a=0
    Q2<-cbind(Q[,1], aa) # data will project into these two dimension
  } 
  else { Q2<-Q }
  x<-y%*%Q2

  return(list(x=x,cl=cl,Q2=Q2))
}


# Visualize clusters by projecting them into a 2-dimensional space.
# The 2-dimensional space is spanned by the first 2 eigenvectors of the
# matrix B:
#\begin{eqnarray*}
# B&=&{2\over
# k_0(k_0-1)}\sum_{i=1}^{k_0-1}\sum_{j=i+1}^{k_0}\E\left[(\Y_i-\Y_j)
# (\Y_i-\Y_j)^T\right]\\
# &=&{2\over k_0}\sum_{i=1}^{k_0}\Sigma_i+{2\over
# k_0(k_0-1)}\sum_{i<j}\left(\thbf_i-\thbf_j\right)
# \left(\thbf_i-\thbf_j\right)^T,
#\end{eqnarray*}
#
# y -- nxp data matrix
# cl -- a partition of the n data points in y
# outlierLabel -- integer (default 0) for detected outliers
# projMethod -- indicates that we use our Eigen method "Eigen" or 
#    method DMS of Dhillon, Modha, Spangler (2002) CSDA v 41, pp 59-90.
# title -- the title of the plot.
viewClusters<-function(
		       y, 
		       cl, 
		       outlierLabel = 0,
                       projMethod = "Eigen", 
		       xlim = NULL, 
		       ylim = NULL,
                       xlab = "1st projection direction", 
		       ylab = "2nd projection direction", 
                       title = "Scatter plot of 2-D Projected Clusters",
                       font = 2, 
		       font.lab = 2, 
		       cex = 1.2, 
		       cex.lab = 1.2) 
{ 
  projMethod<-match.arg(projMethod, choices=c("Eigen", "DMS"))

  y<-as.matrix(y)
  p<-ncol(y);
  y2<-y
  cl2<-cl

  # remove outliers
  out.label<-which(cl==outlierLabel)
  if(length(out.label)>0) 
  { cl2<-cl[-out.label]
    y2<-y[-out.label,,drop=FALSE]
  }

  cl2.u<-unique(cl2)
  k0<-length(cl2.u)

  # project data to the 2-dimensional space spanned by the first 2 
  # eigenvectors of the between cluster distance matrix B.
  if(projMethod=="DMS") { tmp<-DMSProj(
				       y = y2, 
				       cl = cl2, 
				       outlierLabel = outlierLabel) }
  else { tmp<-eigenProj(
			y = y2, 
			cl = cl2, 
			outlierLabel = outlierLabel) }
  #ev<-tmp$ev # eigenvalues of the matrix B
  Q<-tmp$Q # the all eigenvectors of B
  B<-tmp$B # the between distance matrix B

  # we only plot the first 2 dimension, using all data points
  # including outliers
  proj<-y%*%Q[,1:2]
 
  if(missing(xlim)) { xlim<-range(proj[,1]) } 
  else { xlim<-xlim }
  if(missing(ylim)) { ylim<-range(proj[,2]) } 
  else { ylim<-ylim }

  # plot projected clusters
  plot(proj[cl==cl2.u[1], 1], proj[cl==cl2.u[1], 2], xlim=xlim, ylim=ylim,
    xlab=xlab,ylab=ylab,col=1, pch=1,font=font,cex=cex,
    font.lab=font.lab,cex.lab=cex.lab)
  # plot outliers
  if(length(out.label)>0)
  { points(proj[cl==outlierLabel,1], proj[cl==outlierLabel,2], col=2, pch=2) }

  title(title,cex.main=1.5); 
  start<-1
  # plot the data points in the remaining clusters
  if(k0>1)
  { for(i in 2:k0)
    { points(proj[cl==cl2.u[i],1], proj[cl==cl2.u[i],2], col=start+i,pch=start+i) }
  }

  res<-list(B=B, Q=Q, proj=proj)
  invisible(res)
}

# Eigen approach to find the projection directions to visualize clusters.
# Project data to the 2-dimensional space spanned by the first two 
# eigenvectors of the between cluster distance matrix B
# B=(2/ k0)*sum_{i=1}^{k_0}Sigmai
#  +(2/(k_0(k_0-1))sum_{i<j}(thetai-thetaj)(thetai-thetaj)^T, 
# where thetai and Sigmai 
# are the mean vector and covariance matrix for i-th cluster.
#
# y -- nxp data points
# cl -- a partition of the n data points in y
# outlierLabel -- integer (default 0) for detected outliers
eigenProj<-function(
		    y,
		    cl, 
		    outlierLabel = 0)
{ y<-as.matrix(y); p<-ncol(y);
  # remove outliers
  out.label<-which(cl==outlierLabel)
  if(length(out.label)>0) { cl2<-cl[-out.label]; y2<-y[-out.label,]; }
  else { cl2<-cl; y2<-y; }
  cl.set<-sort(unique(cl2))
  k0<-length(cl.set)

  # obtain cluster centers and covariance matrices
  s<-array(0,c(p,p,k0))
  mu.mat<-matrix(0, nrow=k0, ncol=p)
  # W = sum_{i=1}^{k0} Sigma_i /k0
  W<-matrix(0,nrow=p, ncol=p)
  for(i in 1:k0)
  { yi<-y2[cl==cl.set[i],]
    mu.mat[i,]<-apply(yi, 2, mean, na.rm=TRUE)
    s[,,i]<-cov(yi)
    W<-W+s[,,i]
  }
  W<-W/k0

  if(k0>1)
  { # B=sum_{i=1}^{k0-1}sum_{j=(i+1)}^{k0}
    # (theta_i-theta_j)*(theta_i-theta_j)^T / (k0*(k0-1))
    B<-matrix(0, nrow=p, ncol=p)
    for(i in 1:(k0-1))
    { mui<-mu.mat[i,]
      for(j in (i+1):k0)
      { muj<-mu.mat[j,]
        B<-B+outer(mui-muj, mui-muj)
      }
    }
    B<-W+(B/(k0*(k0-1)))
  } else { B<-W }
  B<-2*B

  eg<-eigen(B, symmetric=TRUE)
  Q<-eg$vectors
  # The i-th column of Q is the eigenvector of B
  # corresponding to the i-th eigenvalue
  res<-list(Q=Q, B=B)
  return(res)
}

# Dhillon et al.'s (2002) approach to find the projection directions to 
# visualize clusters.
#  Dhillon I. S., Modha, D. S. and Spangler, W. S. (2002)
#  Class visualization of high-dimensional data with applications.
#  \emph{computational Statistics and Data Analysis}, \bold{41}, 59--90.
# 
# project data to the t-dimensional space spanned by the first two eigen
# vectors of the between cluster distance matrix B
# B=\sum_{i=2}^{k_0}\sum_{j=1}^{i-1}
#   n_in_j(\theta_i-\theta_j)(\theta_i-\theta_j)^T
#
# y -- nxp data points
# cl -- a partition of the n data points in y
# outlierLabel -- integer (default 0) for detected outliers
DMSProj<-function(
		  y, 
		  cl, 
		  outlierLabel = 0)
{ y<-as.matrix(y)
  p<-ncol(y);
  # remove outliers
  out.label<-which(cl==outlierLabel)
  if(length(out.label)>0) { cl2<-cl[-out.label]; y2<-y[-out.label,]; }
  else { cl2<-cl; y2<-y; }
  cl.set<-sort(unique(cl2))
  k0<-length(cl.set)
  n.set<-tapply(rep(1,length(cl2)), cl2, sum, na.rm=TRUE)

  # obtain cluster centers and covariance matrices
  mu.mat<-matrix(0, nrow=k0, ncol=p)
  for(i in 1:k0)
  { yi<-y2[cl==cl.set[i],]
    if(n.set[i]>1)
    { mu.mat[i,]<-apply(yi, 2, mean, na.rm=TRUE) }
    else { mu.mat[i,]<-yi }
  }

  if(k0>1)
  { # B=\sum_{i=2}^{k_0}\sum_{j=1}^{i-1}
    # n_in_j(\theta_i-\theta_j)(\theta_i-\theta_j)^T
    B<-matrix(0, nrow=p, ncol=p)
    for(i in 2:k0)
    { mui<-as.vector(mu.mat[i,])
      for(j in 1:(i-1))
      { muj<-as.vector(mu.mat[j,])
        ni<-n.set[i]; nj<-n.set[j];
        tem1<-(mui-muj)
        tem2<-tem1%*%t(tem1)
        B<-B+n.set[i]*n.set[j]*tem2
      }
    }
  }
  else { stop("the number of clusters is 0!\n") }
  eg<-eigen(B, symmetric=TRUE)
  Q<-eg$vectors
  # The i-th column of Q is the eigenvector of B
  # corresponding to the i-th eigenvalue
  res<-list(Q=Q, B=B)
  return(res)
}
Any scripts or data that you put into this service are public.
clusterGeneration documentation built on Aug. 16, 2023, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
clusterGeneration
Random Cluster Generation (with Specified Degree of Separation)

R/visual.R
In clusterGeneration: Random Cluster Generation (with Specified Degree of Separation)

Defines functions DMSProj eigenProj viewClusters getRotateData plot2DProjection plotTickLabel plotCluster plot1DProjection

Documented in plot1DProjection plot2DProjection viewClusters

Try the clusterGeneration package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clusterGeneration Random Cluster Generation (with Specified Degree of Separation)

R/visual.R In clusterGeneration: Random Cluster Generation (with Specified Degree of Separation)

Defines functions DMSProj eigenProj viewClusters getRotateData plot2DProjection plotTickLabel plotCluster plot1DProjection

Documented in plot1DProjection plot2DProjection viewClusters

Try the clusterGeneration package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clusterGeneration
Random Cluster Generation (with Specified Degree of Separation)

R/visual.R
In clusterGeneration: Random Cluster Generation (with Specified Degree of Separation)