R/PAMfmado.R

Defines functions PAMfmado

Documented in PAMfmado

PAMfmado <- function(x,K,J=0,threshold=.99,max.min=0){
  #
  # Description 
  #   This function performs the PAM algorithm based on the F-madogram distance
  #
  # Aguments 
  #   x a matrix
  #   K number of clusters
  #   J number of resampling for which the stations are randomly moved to break the dependence 
  #                    J=0 means no resampling
  #   threshold corresponding to the quantile level for the resampling.
  #   the resulting quantile is stored via sil.final<<-quantile(sil.vec[sil.vec>0],probs=threshold)
  #                    J=0 means no resampling
  # 
  Nnb = ncol(x) 
  Tnb = nrow(x)
  
  #--- DISTANCE MATRIX
  #--- F-MADOGRAM  
  V = array(NaN, dim = c(Tnb,Nnb))
  for(p in 1:Nnb) {
    x.vec = as.vector(x[,p])
    x.vec[x.vec < max.min]=NA # thresholding 
    Femp = ecdf(x.vec)(x.vec)
    V[,p] = Femp
  }
  # with Madogram
  DD = dist(t(V),method = "manhattan",diag = TRUE, upper = TRUE)/(2*Tnb)
  # !!! NA are excluded for when computing the distance between two rows but the dist is then multiplied by (Tnb/ nb of non-NA) 
  # in order to have comparable distances (ie wrt the number of data points) among all rows 
  
  #---------- CLUSTERING WITH PAM -------#
  output = pam(DD,K,diss = TRUE,medoids = NULL)
  
  #################################################################################
  ####  resampling step to compute the silhouette coeff for independent case
  #################################################################################
  if(J>0){
    sample.R = function(x) sample(x)
    sil.vec=rep(0,J)
    for(j in 1:J){
      x.sample=apply(x,2,sample.R) # resampling within each column 
      for(p in 1:Nnb) {
        x.bis = as.vector(x.sample[,p])
        x.bis[x.bis < max.min]=NA # thresholding 
        Femp = ecdf(x.bis)(x.bis)
        V[,p] = Femp
      }
      # with Madogram
      DD = dist(t(V),method = "manhattan",diag = TRUE, upper = TRUE)/(2*Tnb)
      sil.vec[j]=pam(DD,K,diss = TRUE,medoids = NULL,keep.diss=FALSE,keep.data=FALSE)$silinfo$avg.width
    }
    sil.final = quantile(sil.vec[sil.vec>0],probs=threshold)
    print("The quantile for resampling is ", sil.final, "\n", sep="")
  }
  
  return(output)
}

Try the ExtremalDep package in your browser

Any scripts or data that you put into this service are public.

ExtremalDep documentation built on April 4, 2025, 1:44 a.m.