R/perioclust.R
In SPARTAAS: Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

Documented in adjacency CAdist hclustcompro hclustcompro_select_alpha hclustcompro_subdivide overlap seriograph

CAdist <- function(df, nPC = NULL, graph = TRUE){

  #===============================================================================
  # GENERATE DISTANCE MATRIX WITH CORRESPONDENCE ANALYSiS (CA) (Fr: Analyse Factorielle des Correspondances)
  #
  # Authors : A.COULON
  # Last update : 23 october 2018
  #
  # Arguments:
  # df	      The ceramic contingence table
  # nPC       The number of principal component to keep
  # graph     Logical for print or not the plot of the CA
  #
  #===============================================================================

  if(!is.table(df)){
    if(is.matrix(df)){df <- as.data.frame(df)}
    if(!is.data.frame(df)){stop("df is not a data frame.")}
  }

  if(FALSE %in% (df == trunc(df))){stop("The data frame must contain integers.")}
  max <- min( nrow(df)-1,ncol(df)-1)

  if(!is.null(nPC)){
    if(nPC == "max"){nPC <- max}
    if(nPC > max){
      message("nPC must be less than: ",max+1,".")
      nPC <- max
    }
  }else{
    tmp <- FactoMineR::CA(df, graph=graph)

    x <- paste0(rownames(tmp$eig)," (",arrondi(tmp$eig[,3],1),"%)")
    Eigen_value <- tmp$eig[,1]
    df2 <- data.frame(x,Eigen_value)
    df2 <- within(df2,Principal_Component <- factor(x,levels = x))

    p <- plotly::plot_ly(df2,
      x = ~Principal_Component,
      y = ~Eigen_value,
      type = "bar"
    )
    print(p)
    if(interactive()){
      OK <- TRUE
      while(OK){
        nPC <- as.integer(readline("Select the number of axes:"))
        if(is.na(nPC) || nPC < 1 || nPC > max){
          message("must be a integer between 1 and ",max)
        }else{
          OK <- FALSE
        }
      }
    }else{
      stop("Need a nPC value.")
    }

  }
  df.CA <- FactoMineR::CA(df, ncp = nPC, graph = graph)
  dist <- dist(df.CA$row$coord) / max(dist(df.CA$row$coord))
  #dist <- dist(scale(df.CA$row$coord)) / max(dist(scale(df.CA$row$coord)))
  return(D=as.matrix(dist))
}

adjacency <- function(network){

  #===============================================================================
  # GENERATE DISTANCE MATRIX WITH NETWORK
  #
  # Authors : A.COULON
  # Last update : 23 october 2018
  #
  # Arguments:
  # raw	      The data frame with the stratigraphic element and the relation with the other
  #
  #===============================================================================

  #data.frame ?
  if(!is.data.frame(network)){stop("network data must be a data.frame.")}
  #number of col  ?
  if(length(network[1,])!=2){stop("network must have 2 colunms.")}
  M <- matrix(0,ncol = length(network[,1]),nrow = length(network[,1]),dimnames = list(network[,1],network[,1]))
  diag(M) <- 1
  for(i in 1:length(network[,1])){
    rel <- as.character(network[i,2])
    rel <- strsplit(rel,",")
    for(elt in rel[[1]]){
      j <- which(network[,1] == elt)
      M[i,j] <- 1
      M[j,i] <- 1
    }
  }
  D <- 1 - M
  return(D)
}

overlap <- function(temporal){

  #===============================================================================
  # GENERATE DISSIMILARITY MATRIX WITH TEMPORAL
  #
  # Authors : A.COULON
  # Last update : 23 october 2018
  #
  # Arguments:
  # temporal	      The data frame
  #
  #===============================================================================

  indice <- function(inf1,sup1,inf2,sup2){
    if(inf2 == inf1 && inf1 == sup2 && inf1 == sup1){
      return(1)
    }
    if(sup1 < inf1 || inf2 > sup2){
      return(99)
    }
    #we define the bound of the total overlap as follow : min of lower bounds and max of upper bounds
    total_overlap_inf <- min(inf1,inf2)
    total_overlap_sup <- max(sup1,sup2)

    #The overlap is the difference between the bounds
    total_overlap <- total_overlap_sup - total_overlap_inf

    #we define the bound of the within overlap as follow : max of lower bounds and min of upper bounds
    within_overlap_inf <- max(inf1,inf2)
    within_overlap_sup <- min(sup1,sup2)

    #The overlap is the difference between the bounds
    within_overlap <- within_overlap_sup - within_overlap_inf

    #the overlap index is the ratio between the within overlap and the total overlap
    i <- within_overlap / total_overlap
    return(i)
  }

  df <- as.data.frame(temporal)
  #we create a square matrix of the same size as the number of rows
  overlap_matrix <- matrix(nrow = length(df[,1]), ncol = length(df[,1]))

  #we look each couple i j and compute the overlap index for this couple and save the value in the matrix[i,j]
  for(i in 1:(length(df[,1])-1)){
    for(j in (i+1):length(df[,1])){
      #recuperation of the elements rows i and j
      ens1 <- df[i,]
      ens2 <- df[j,]
      #compute the overlap index
      ind <- indice(ens1[1,2],ens1[1,3],ens2[1,2],ens2[1,3])
      #if(ind < 0){ind <- 0}
      if(ind==99){stop("The upper and lower bound are not in the correct column (sup1 < inf1 || inf2 > sup2).")}
      #if(ind <= 0){ind <- 0}
      #if(ind > 0){ind <- 1}
      #fill the matrix
      overlap_matrix[i,j] <- overlap_matrix[j,i] <- ind
    }
  }

  #fill the diag with 1
  diag(overlap_matrix) <- 1
  #[-1,1] to [0,2] (2 mean identical time range)
  overlap_matrix <- overlap_matrix + 1
  #inversion of values (high values now mean distant elements)
  overlap_matrix <- 2 - overlap_matrix
  #[0,2] to [0,1]
  overlap_matrix <- overlap_matrix / 2
  rownames(overlap_matrix) <- colnames(overlap_matrix) <- df[,1]
  return(D=overlap_matrix)
}

perioclust <- hclustcompro <- function(D1,D2,alpha = "EstimateAlphaForMe",k = NULL,title = "notitle",method = "ward.D2",suppl_plot = TRUE) {
  #===============================================================================
  # CAH CLASSIFICATION USING TWO DIST MATRIX
  #
  # Authors : A.COULON, L.BELLANGER and P.HUSI
  # Last update : 23 october 2018
  #
  # Arguments:
  # D1	       The fisrt distance matrix. Archeological context: Stratigraphic or timerange distance.
  # D2	       The second distance matrix. Archeological context: ceramic distance.
  # alpha	     The mixing parameter
  # k          The number of clusters
  # title	     The title to print on the dendrogram. (optionnal)
  # method     The method to use in hclust (see doc)
  # suppl_plot Logical for plot the WSS and average sil plot.
  #
  #===============================================================================

  sort = TRUE
  if(is.null(labels(D1))){
    sort <- FALSE
  }
  if(is.null(labels(D2))){
    sort <- FALSE
  }

  METHODS <- c("ward.D","single","complete","average","mcquitty","median","centroid","ward.D2")
  if (method == "ward") {
    message("The \"ward\" method has been renamed to \"ward.D\"; note new avaible method \"ward.D2\"")
    method <- "ward.D"
  }
  i.meth <- pmatch(method, METHODS)
  if (is.na(i.meth))
    stop("invalid clustering method", paste("", method))
  if (i.meth == -1)
    stop("ambiguous clustering method", paste("", method))

  method <- METHODS[i.meth]
  cont <- NULL
  Number_of_groups <- c()
  y <- c()
  yend <- c()
  xend <- c()
  if(!"dist" %in% class(D2)){
    #matrix ?
    if(!is.matrix(D2)){
      if(length(D2[1,]) == 3){
        message("generation of a temporal dissimilarities matrix.")
        D2 <- overlap(D2)
      }else{
        message("generation of a stratigraphic dissimilarities matrix.")
        D2 <- adjacency(D2)
        #diag = 0 ?
        if(sum(diag(D2)) != 0){
          stop("D2: The diagonal must be equal to 0.")
        }
      }
    }

    #symetric ?
    if(!isSymmetric(D2)){
      stop("D2 is not symmetric.")
    }
    if(!dim(D2)[1] == dim(D2)[2]){stop("D2 not a square matrix.")}
  }

  if(!"dist" %in% class(D1)) {
    #matrix ?
    if(!is.matrix(D1)){
      if(is.data.frame(D1)){cont <- D1;EPPM <- TRUE}else{EPPM <- FALSE}
      message("generation of a ceramic distances matrix.")
      D1 <- as.matrix(CAdist(D1,nPC="max",graph=FALSE))
    }
    #diag = 0 ?
    if(sum(diag(D1)) != 0){
      stop("D1: The diagonal must be equal to 0.")
    }
    #symetric ?
    if(!isSymmetric(D1)){
      stop("D1 is not symmetric.")
    }
    if(!dim(D1)[1] == dim(D1)[2]){stop("D1 not a square matrix.")}
  }

  D1 <- as.matrix(D1)
  max <- dim(D1)[1]
  D2 <- as.matrix(D2)

  if(FALSE %in% (sort(rownames(D1)) == sort(rownames(D2)))){
    warning("The names of D1 and D2 are not the same.")
    sort <- FALSE
  }
  if(FALSE %in% (sort(colnames(D1)) == sort(colnames(D2)))){
    warning("The names of D1 and D2 are not the same.")
    sort <- FALSE
  }
  if(FALSE %in% (sort(rownames(D1)) == sort(colnames(D2)))){
    warning("The names of D1 and D2 are not the same.")
    sort <- FALSE
  }

  if(!length(D1) == length(D2)){stop("D1 and D2 have not the same size.")}
  #dist (positive matrix) ?
  if(min(D1) < 0){stop("D1 is not positive.")}
  if(min(D2) < 0){stop("D2 is not positive.")}

  #no alpha
  if(alpha == "EstimateAlphaForMe"){
    sa <- hclustcompro_select_alpha(D1, D2, method = method, resampling = FALSE)
    alpha <- sa$alpha[1]
    sa <- list(alpha.plot = NULL,conf = NULL)
  }else{
    sa <- list(alpha.plot = NULL,conf = NULL)
  }
  #alpha 0-1 ?
  if(alpha > 1 || alpha < 0){stop("alpha must be between 0 and 1.")}
  #normalization
  if(max(D1) != 0){
    D1 <- D1 / max(D1)
  }
  if(max(D2) != 0){
    D2 <- D2 / max(D2)
  }
  #create mixing matrix
  D1 <- as.dist(D1)
  D2 <- as.dist(D2)
  D1 <- as.matrix(D1)
  D2 <- as.matrix(D2)

  #sort labels
  if (sort) {
    D1.Tmp <- D1[sort(labels(D1)[[1]]),sort(labels(D1)[[1]])]
    D2.Tmp <- D2[sort(labels(D2)[[1]]),sort(labels(D2)[[1]])]
    D1 <- as.dist(D1.Tmp)
    D2 <- as.dist(D2.Tmp)
  }

  #mixt matrix
  D_alpha <- (alpha)*D1 + (1-alpha)*D2
  message("\nThe mixing parameter is ",alpha,". The mixing matrix is made of ",arrondi((alpha)*100,2),"% of D1 and ",
      arrondi((1-alpha)*100,2),"% of D2.\n\n")
  D_alpha <- as.dist(D_alpha)

  #CAH
  tree <- SPARTAAS::hclust(d=D1,method=method,d2=D2,alpha=alpha)

  if(title == "notitle"){
    title <- paste("Dendrogram with alpha =",alpha)
  }
  subtitle <- paste("The mixing matrix is made of ",arrondi((alpha)*100,2),"% of D1 and ",
                    arrondi((1-alpha)*100,2),"% of D2.")

  D <- as.matrix(D_alpha)
  n <- length(D[,1])

  #wss sil value
  if(suppl_plot){
    if(length(D[,1]) > 20){n <- 20}
    wss <- c()
    sil <- c()
    for ( i in 2:(n-1)){
      cutree <- stats::cutree(tree,i)
      res <- fpc::cluster.stats(D,cutree)
      wss <- c(wss,res$within.cluster.ss)
      sil <- c(sil,res$avg.silwidth)
    }
    #wss
    Within_Sum_of_Square <- rbind(seq(2,(n-1)),wss)
    color_wss <- elbow_finder(seq(2,(n-1)),wss)
    dataplot <- t(Within_Sum_of_Square)
    dataplot <- as.data.frame(dataplot)
    colnames(dataplot) <- c("Number_of_groups","Within_Sum_of_Square")
    q <- ggplot2::ggplot(dataplot,aes(x=Number_of_groups, y=Within_Sum_of_Square)) +
      ggplot2::geom_point(aes(color=color_wss)) +
      ggplot2::scale_colour_gradientn(colours=c("brown1","chartreuse3")) +
      ggplot2::geom_line(linetype = 3) +
      ggplot2::scale_x_continuous(breaks = dataplot$Number_of_groups ,
                                  labels = dataplot$Number_of_groups ) +
      ylim(0,max(wss)+1) +
      labs(title = "Within Sum of Square" , subtitle = "Partition evaluation") +
      ggplot2::theme(legend.position="none")

    #silhouette
    Average_Sil_Width <- rbind(seq(2,(n-1)),sil)
    dataplot <- t(Average_Sil_Width)
    dataplot <- as.data.frame(dataplot)
    dataplot <- na.omit(dataplot)
    colnames(dataplot) <- c("Number_of_groups","Average_Sil_Width")
    p <- ggplot2::ggplot(dataplot,aes(x = Number_of_groups, y = Average_Sil_Width)) +
      ggplot2::geom_point(aes(color=Average_Sil_Width)) +
      ggplot2::scale_colour_gradientn(colours=c("brown1","chartreuse3")) +
      ggplot2::geom_line(linetype = 3) + ylim(0,1) +
      ggplot2::scale_x_continuous(breaks = dataplot$Number_of_groups,
                                  labels = dataplot$Number_of_groups) +
      labs(title = "Average Silhouette Width", subtitle = "Partition evaluation") +
      ggplot2::theme(legend.position="none")

    # Nouvelle page
    grid::grid.newpage()
    # Creer la mise en page : nrow = 2, ncol = 2
    grid::pushViewport(viewport(layout = grid.layout(2, 2)))
    # Une fonction pour definir une region dans la mise en page
    define_region <- function(row, col){
      grid::viewport(layout.pos.row = row, layout.pos.col = col)
    }

    # Arranger les graphiques dans la fenetre
    hc <- tree
    hcdata <- ggdendro::dendro_data(tree, type="rectangle")
    dendrogram <- ggplot2::ggplot() + ggplot2::ggtitle(title) + ylim(-0.2, max(tree$height)) + ylab("height") + xlab("") +
      ggplot2::geom_segment(data=segment(hcdata), aes(x=x, y=y, xend=xend, yend=yend)) +
      ggplot2::geom_text(data=label(hcdata), aes(x=x, y=y,label=label,hjust=1,angle=90,fontface="bold",cex=1.5),
                         size=3) +
      ggplot2::theme(axis.title.x=element_blank(),
                     axis.text.x=element_blank(),
                     axis.ticks.x=element_blank())

    print(dendrogram, vp = define_region(1:2, 1))
    print(p, vp = define_region(1, 2))
    print(q, vp = define_region(2, 2))

    if(is.null(k)){
      if(interactive()){
        OK <- TRUE
        while(OK){
          k <- as.integer(readline("Select the number of clusters:"))
          if(is.na(k) || k < 1 || k > max){
            message("must be a integer between 1 and ",max)
          }else{
            OK <- FALSE
          }
        }
      }else{
        stop("No interactive session. Use parameter k to set clusters.")
      }
    }
  }else{
    if(is.null(k)){
      plot(tree,h = -1,main = title,sub = subtitle,xlab = paste0("D_alpha, method: \"",method,"\""))
      if(interactive()){
        OK <- TRUE
        while(OK){
          k <- as.integer(readline("Select the number of clusters:"))
          if(is.na(k) || k < 1 || k > max){
            message("must be a integer between 1 and ",max)
          }else{
            OK <- FALSE
          }
        }
      }else{
        stop("No interactive session. Use parameter k to set clusters.")
      }
    }
  }


  plot(tree,h = -1,main = title,sub = subtitle,xlab = paste0("D_alpha, method: \"",method,"\""))
  stats::rect.hclust(tree, k = k, border = colorspace::rainbow_hcl(k, c = 80, l = 60, start = 0, end = 360 * (k-1) / k))
  cluster <- stats::cutree(tree, k = k)
  height <- tree$height[length(tree$height) - k + 2]
  order <- unique(cluster[tree$order])
  for(i in 1:length(order)){
    if(i != order[i] && i < order[i]){
      cluster[cluster == i] <- 0
      cluster[cluster == order[i]] <- i
      cluster[cluster == 0] <- order[i]
      order <- unique(cluster[tree$order])
    }
  }
  col <- colorspace::rainbow_hcl(length(unique(cluster)),
                     c = 80, l = 60, start = 0,
                     end = 360*(length(unique(cluster))-1)/(length(unique(cluster))))
  for(i in 1:length(unique(cluster))){
    x <- mean(which(cluster[tree$order] == sort(unique(cluster))[i]))

    if(length(unique(cluster)) > 338){
      symb_letter <- sort(unique(cluster))[i]
    }else{
      symb_letter <- Int2Alpha(sort(unique(cluster))[i])
    }
    mtext(symb_letter, side = 1, line = .5, at = x, col = col[i], cex = 1.5)
  }
  cutree <- grDevices::recordPlot()

  return(
    structure(
      list(
        D1 = D1,
        D2 = D2,
        D_alpha = D_alpha,
        alpha = alpha,
        alpha.plot = sa$alpha.plot,
        conf_alpha = sa$conf,
        tree = tree,
        cluster = cluster,
        cutree = cutree,
        call = match.call(),
        cont = cont,
        wss=q,
        avg_sil=p
      ),
      class = c("hclustcompro_cl","list")
    )
  )
}

hclustcompro_subdivide <- function(hclustcompro_cl,cluster,nb_class){

  #test
  if("hclustcompro_cl" != class(hclustcompro_cl)[1]){stop("Not a hclustcompro object.")}
  if(length(cluster) != length(nb_class)){stop("cluster and nb_cluster must have the same length.")}
  title <- paste("Cluster Dendrogram with alpha =",hclustcompro_cl$alpha)
  subtitle <- paste("The mixing matrix is made of ",arrondi((hclustcompro_cl$alpha)*100,2),"% of D1 and ",
                    arrondi((1-hclustcompro_cl$alpha)*100,2),"% of D2.")
  xlab <- paste("D_alpha, method: \"ward.D2\"")
  if(!is.null(hclustcompro_cl$oldcluster)){
    hclustcompro_cl$cluster <- hclustcompro_cl$oldcluster
  }
  k <- length(unique(hclustcompro_cl$cluster))
  color <- colorspace::rainbow_hcl(k, c = 80, l = 60, start = 0,
                       end = 360*(k-1)/(k))
  plot(hclustcompro_cl$tree, h=-1, main = title, sub = subtitle, xlab=xlab)
  stats::rect.hclust(hclustcompro_cl$tree,k = k,border = color)
  clusters <- c()

  cmpt <- 0
  for(i in 1:length(cluster)){
    where <- cluster[i]
    sk <- nb_class[i]
    number <- length(hclustcompro_cl$cluster[hclustcompro_cl$cluster == where])
    if(!sk %in% seq(2,(number-1)) ){stop("You try to create more sub cluster than possible.")}
    #col2 <- rainbow_hcl((k+sk-1), c = 80, l = 60, start = 0, end = 360*((k+sk-1)-1)/(k+sk-1))
    #add
    col <- colorspace::rainbow_hcl(k, c = 80, l = 60, start = 0,
                                end = 360*(k-1)/(k))
    col <- col[where]
    color <- append(color, rep(col,sk-1), after = where+cmpt)
    cmpt <- cmpt + sk-1
    #end
    if(length(hclustcompro_cl$cluster[hclustcompro_cl$cluster == where]) < sk){
      stop("There is not enought element in this class to create ",sk," sub-classes.")
    }
    subdiv <- sub_div(hclustcompro_cl$tree,hclustcompro_cl$cluster,where,sk)
    step <- subdiv$step
    clusters <- fusion(clusters,subdiv$cluster)

    #for color
    nbcluster <- stats::cutree(hclustcompro_cl$tree,h=hclustcompro_cl$tree$height[step])
    order <- unique(nbcluster[hclustcompro_cl$tree$order])
    for(i in 1:length(order)){
      if(i != order[i] && i < order[i]){
        nbcluster[nbcluster == i] <- 0
        nbcluster[nbcluster == order[i]] <- i
        nbcluster[nbcluster == 0] <- order[i]
        order <- unique(nbcluster[hclustcompro_cl$tree$order])
      }
    }

    nb <- which(hclustcompro_cl$cluster == where)
    nb <- nbcluster[nb]
    which <- sort(unique(nb))
    stats::rect.hclust(hclustcompro_cl$tree,h = hclustcompro_cl$tree$height[step+1],which = which,
                border = col)  #col2[seq(where,where+sk-1)])
  }
  #col2 <- rainbow_hcl(length(unique(clusters)), c = 80, l = 60, start = 0,
  #                   end = 360*(length(unique(clusters))-1)/(length(unique(clusters))))
  for(i in 1:length(unique(clusters))){
    col <- color[i]
    x <- mean(which(clusters[hclustcompro_cl$tree$order] == sort(unique(clusters))[i]))
    # col2[i], cex=1.5)
    graphics::mtext(Int2Alpha(sort(unique(clusters))[i]), side = 1, line = .5, at = x, col = col, cex=1.5)
  }
  cutree <- grDevices::recordPlot()
  return(
    structure(
      list(
        D1 = hclustcompro_cl$D1,
        D2 = hclustcompro_cl$D2,
        D_alpha = hclustcompro_cl$D_alpha,
        alpha = hclustcompro_cl$alpha,
        tree = hclustcompro_cl$tree,
        cluster = clusters,
        oldcluster = hclustcompro_cl$cluster,
        cutree = cutree,
        call = match.call(),
        cont = hclustcompro_cl$cont
      ),
      class = c("hclustcompro_cl","list")
    )
  )
}

seriograph <- function(cont, order = NULL, insert = NULL, show = "both", permute = TRUE){

  show <- tolower(show)
  SHOWS <- c("both", "frequency", "eppm")
  i.show <- pmatch(show, SHOWS)
  if (is.na(i.show))
    stop("invalid show method", paste("", show))
  if (i.show == -1)
    stop("ambiguous show method", paste("", show))

  show <- SHOWS[i.show]
  if(show == "eppm"){
    show <- "EPPM"
  }

  if(class(cont)[1] == "hclustcompro_cl"){
    if(is.null(cont$cont)){
      stop("We don't find contingency table in hclustcompro_cl object.")
    }else{
      hclustcompro_cl <- cont
      cont <- hclustcompro_cl$cont
    }
    remove <- Int2Alpha(sort(unique(hclustcompro_cl$cluster)))
    if(is.null(order)){
      order <- Int2Alpha(sort(unique(hclustcompro_cl$cluster)))
    }
    remove <- remove[!remove %in% order]
    if(length(remove) >= 1){
      remove <- Alpha2Int(remove)
    }
    Cluster <- c(0)

    split <- Alpha2Int(order)
    seq <- hclustcompro_cl$cluster
    if(length(remove) >= 1){
      removeseq <- which(seq %in% remove)
      for(i in 1:length(removeseq)){
        seq[removeseq[i]] <- 900 + i
      }
    }

    c <- 0
    d <- 1
    pelt <- 0
    for (i in 1:length(split)) {
      elt <- as.numeric(split[i])
      felt <- as.numeric(split[i+1])
      if(elt != as.integer(elt)){
        if(as.integer(elt) != as.integer(pelt)){
          c <- c + 1
          d <- 1
        }
        seq[hclustcompro_cl$cluster == elt] <- as.numeric(paste0(c,".",d))
        d <- d + 1
      }else{
        c <- c + 1
        seq[hclustcompro_cl$cluster == elt] <- c
        d <- 1
      }
      pelt <- elt
    }
    message("Distribution of elements in the different clusters:\n")

    if(length(remove) >= 1){
      removeseq <- which(seq >= 900)
      seq2 <- seq[-removeseq]
      print(seq2)
    }else{
      print(seq)
    }

    cont <- cont[sort(labels(cont)[[1]]),]
    cont2 <- dplyr::mutate(cont, Cluster = factor(seq))
    cont2 <- dplyr::group_by(cont2, Cluster)
    cont2 <- dplyr::summarise_all(cont2, list(sum=sum))
    cont2 <- as.data.frame(cont2)
    rownames(cont2) <- cont2[,1]
    cont2 <- cont2[,-1]
    if(length(remove) >= 1){
      remove <- which(rownames(cont2) >= 900)
      cont2 <- cont2[-remove,]
    }
    cont2 <- cont2[order(as.numeric(rownames(cont2)),decreasing = T),]
    rownames(cont2) <- order

    if(!is.null(insert)){
      #insert hiatus
      ignore <- FALSE
      if(is.list(insert)){
        if(!is.numeric(insert[[1]])){
          if(!is.numeric(insert[[2]])){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }else{
            tmp <- insert[[1]]
            insert[[1]] <- insert[[2]]
            insert[[2]] <- tmp
          }
        }
        if(!ignore){
          if(FALSE %in% (insert[[1]] == trunc(insert[[1]])) ){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }else{
            if(!is.character(insert[[2]])){
              warning("Insert must contain labels as a character vector in the list.")
              ignore <- TRUE
            }
          }
        }
        if(!ignore){
          insert[[1]] <- length(cont2[,1]) - insert[[1]]
          label <- make.names(insert[[2]],unique=TRUE)
          for (i in length(insert[[1]]):1){
            after <- sort(insert[[1]])[i]
            if(after < length(cont2[,1]) && after >= 1 && after == trunc(after)){
              cont2 <- rbind(cont2[1:after,], label = rep(0,length(cont2[1,])),
                             cont2[(after+1):length(cont2[,1]),])
              row.names(cont2)[after+1] <- label[i]
            }else{
              warning("Some positions are outside the range. Ex: ",(length(cont2[,1]) - after))
            }
          }
        }
      }else{
        if(is.vector(insert)){
          if(!is.numeric(insert)){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }
          if(!ignore){
            if(FALSE %in% (insert[[1]] == trunc(insert[[1]])) ){
              warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
              ignore <- TRUE
            }
          }
          if(!ignore){
            insert <- length(cont2[,1]) - insert
            label <- "Hiatus"
            insert <- sort(insert)
            for (i in length(insert):1){
              if(insert[i] < length(cont2[,1]) && insert[i] >= 1 && insert[i] == trunc(insert[i])){
                cont2 <- rbind(cont2[1:insert[i],], label = rep(0,length(cont2[1,])),
                               cont2[(insert[i]+1):length(cont2[,1]),])
                if(length(insert)== 1){
                  row.names(cont2)[insert[i]+1] <- label
                }else{
                  row.names(cont2)[insert[i]+1] <- paste0(label, ".", (length(insert) + 1 - i) )
                }
              }else{
                warning("Some positions are outside the range. Ex: ",(length(cont2[,1]) - insert[i]))
              }
            }
          }
        }else{
          stop("insert must be a list of two vector (position after which to insert and label)
               or a vector (position)")
        }
      }
    }
    res <- plot_EPPM(cont2,show,permute)
    return(
      structure(
        list(
          seriograph = res$seriograph,
          contingency = res$contingency,
          frequency = res$frequency,
          ecart = res$ecart
        ),
        class = "seriograph"
      )
    )

  }else{
    #test data.frame
    if(!is.data.frame(cont)){
      cont <- as.data.frame(cont)
      if(!is.data.frame(cont)){stop("cont must be a data frame.")}
      if(!FALSE %in% (cont == trunc(cont))){stop("A contingency table must contains integers values.")}
    }
    cont2 <- cont

    #order
    if(!is.null(order)){
      new_order <- rep(NA,length(order))

      for(i in 1:length(order)){
        new_order[i] <- which(labels(cont2)[[1]] == order[i])
      }
      cont2 <- cont2[new_order,]

      #for (i in 1:length(order)){
      #  order[i] <- which(rownames(cont2) == order[i])
      #}
      #cont2 <- cont2[order(as.numeric(order),decreasing = F),]
    }

    if(!is.null(insert)){
      #insert hiatus
      ignore <- FALSE
      if(is.list(insert)){
        if(!is.numeric(insert[[1]])){
          if(!is.numeric(insert[[2]])){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }else{
            tmp <- insert[[1]]
            insert[[1]] <- insert[[2]]
            insert[[2]] <- tmp
          }
        }
        if(!ignore){
          if(FALSE %in% (insert[[1]] == trunc(insert[[1]])) ){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }else{
            if(!is.character(insert[[2]])){
              warning("Insert must contain labels as a character vector in the list.")
              ignore <- TRUE
            }
          }
        }
        if(!ignore){
          insert[[1]] <- length(cont2[,1]) - insert[[1]]
          label <- make.names(insert[[2]],unique=TRUE)
          for (i in length(insert[[1]]):1){
            after <- sort(insert[[1]])[i]
            if(after < length(cont2[,1]) && after >= 1 && after == trunc(after)){
              cont2 <- rbind(cont2[1:after,], label = rep(0,length(cont2[1,])),
                             cont2[(after+1):length(cont2[,1]),])
              row.names(cont2)[after+1] <- label[i]
            }else{
              warning("Some positions are outside the range. Ex: ",(length(cont2[,1]) - after))
            }
          }
        }
      }else{
        if(is.vector(insert)){
          if(!is.numeric(insert)){
            warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
            ignore <- TRUE
          }
          if(!ignore){
            if(FALSE %in% (insert[[1]] == trunc(insert[[1]])) ){
              warning("Insert must contain integers corresponding to the position where to insert Hiatus.")
              ignore <- TRUE
            }
          }
          if(!ignore){
            insert <- length(cont2[,1]) - insert
            label <- "Hiatus"
            insert <- sort(insert)
            for (i in length(insert):1){
              if(insert[i] < length(cont2[,1]) && insert[i] >= 1 && insert[i] == trunc(insert[i])){
                cont2 <- rbind(cont2[1:insert[i],], label = rep(0,length(cont2[1,])),
                               cont2[(insert[i]+1):length(cont2[,1]),])
                if(length(insert)== 1){
                  row.names(cont2)[insert[i]+1] <- label
                }else{
                  row.names(cont2)[insert[i]+1] <- paste0(label,".",(length(insert)+1 - i))
                }
              }else{
                warning("Some positions are outside the range. Ex: ",(length(cont2[,1]) - insert[i]))
              }
            }
          }
        }else{
          stop("insert must be a list of two vector (position after which to insert and label)
               or a vector (position)")
        }
      }
    }
    res <- plot_EPPM(cont2,show,permute)
    return(
      structure(
        list(
          seriograph = res$seriograph,
          contingency = res$contingency,
          frequency = res$frequency,
          ecart = res$ecart
        ),
        class = "seriograph"
      )
    )
  }
}


print.hclustcompro_cl <- function(x, ...){

  cat("     hclustcompro | hclustcompro_cl

Alpha:",x$alpha,"

call: ")
  print(x$call)
  cat("
value:
      .. $D1          The first dissimilarities matrix
      .. $D2          The second dissimilarities matrix
      .. $D_alpha     The mixing dissimilarities matrix
      .. $alpha       The mixing parameter
      .. $tree        An object of class hclust (see ?hclust)

If you ran hclustcompro like this: classif <- hclustcompro(...)
select another number of cluster on the plot:
> plot(classif$tree, h = -1)
> rect.hclust(classif$tree, k = k) #with k the number of cluster

documentation:
> ?hclustcompro
      ")
}

hclustcompro_detail_resampling <- function(D1, D2 = NULL, acc = 2, method = "ward.D2", iter = 5){

  #===============================================================================
  # CAH CLASSIFICATION USING TWO DIST MATRIX
  #
  # Authors : A.COULON, L.BELLANGER and P.HUSI
  # Last update : 23 october 2018
  #
  # Arguments:
  # D1	                  The fisrt distance matrix. Archeological context: stratigraphic distance.
  # D2	                  The second distance matrix. Archeological context: ceramic distance.
  # acc	                  The accuracy to round.
  # method                The method to use in hclust (see doc)
  # iter                   The number of replication for each element
  #
  #===============================================================================
  METHODS <- c("ward.D", "single", "complete", "average", "mcquitty",
               "median", "centroid", "ward.D2")
  if (method == "ward") {
    message("The \"ward\" method has been renamed to \"ward.D\"; note new avaible method \"ward.D2\"")
    method <- "ward.D"
  }
  i.meth <- pmatch(method, METHODS)
  if (is.na(i.meth))
    stop("invalid clustering method", paste("", method))
  if (i.meth == -1)
    stop("ambiguous clustering method", paste("", method))

  method <- METHODS[i.meth]
  if(!is.numeric(acc)) {stop("acc must be numeric!")}
  if(acc < 1) {
    acc <- 1;
    message("The minimal value allowed for acc is 1.")
  }

  if(class(D1)[1] == "hclustcompro_cl"){
    if(is.null(D1$D1) || is.null(D1$D2)){
      stop("We don't find dissmilarities matrices in hclustcompro_cl object.")
    }else{
      D2 <- D1$D2
      D1 <- D1$D1
    }
  }else{
    if(is.null(D1) || is.null(D2)){
      stop("We don't find dissmilarities in D1 and D2 arguments.")
    }
    #test
    if(!"dist" %in% class(D1)){
      #matrix ?
      if(!is.matrix(D1)){
        message("generation of a ceramic distances matrix.")
        D1 <- as.matrix(CAdist(D1,nPC="max",graph=FALSE))
      }
      #diag = 0 ?
      if(sum(diag(D1)) != 0){
        stop("D1: in distance matrix the diagonal is equal to 0.")
      }
      #symetric ?
      if(!isSymmetric(D1)){
        stop("D1 is not symmetric.")
      }
    }
    if(!"dist" %in% class(D2)){
      #matrix ?
      if(!is.matrix(D2)){
        if(length(D2[1,]) == 3){
          message("generation of a temporal dissimilarities matrix.")
          D2 <- overlap(D2)
        }else{
          message("generation of a stratigraphic dissimilarities matrix.")
          D2 <- adjacency(D2)
          #diag = 0 ?
          if(sum(diag(D2)) != 0){
            stop("D2: The diagonal must be equal to 0.")
          }
        }
      }

      #symetric ?
      if(!isSymmetric(D2)){
        stop("D2 is not symmetric.")
      }
    }
    if(!length(D1[,1]) == length(D1[1,])){stop("D1 not a square matrix.")}
    if(!length(D2[,1]) == length(D2[1,])){stop("D2 not a square matrix.")}
    #dist (positive matrix) ?
    if(min(D1) < 0){stop("D1 not a matrix dissimilarity (positive).")}
    if(min(D2) < 0){stop("D2 not a matrix distance (positive).")}
    if(median(D2) != 0 && median(D2) != 1){
      if(median(D1) == 0 || median(D1) == 1){
        message("We switch the two matrices. Stratigraphic data must be in D1 and ceramic data in D2.")
        tmp <- D1
        D1 <- D2
        D2 <- tmp
      }
    }
    #normalization
    if(max(D1) != 0){
      D1 <- D1 / max(D1)
    }
    if(max(D2) != 0){
      D2 <- D2 / max(D2)
    }
    #create mixing matrix
    D1 <- as.dist(D1)
    D2 <- as.dist(D2)
    D1 <- as.matrix(D1)
    D2 <- as.matrix(D2)
    #sort labels
    D1.Tmp <- D1[sort(labels(D1)[[1]]),sort(labels(D1)[[1]])]
    D2.Tmp <- D2[sort(labels(D2)[[1]]),sort(labels(D2)[[1]])]
    D1 <- as.matrix(D1.Tmp)
    D2 <- as.matrix(D2.Tmp)
  }

  save_lines <- NULL
  alpha_seq <- seq(0,1,0.01)

  D1save <- as.matrix(D1)
  D2save <- as.matrix(D2)

  #echantillonnage
  if(iter == "max"){iter <- length(D1save[1,])}
  if(!is.numeric(iter)){stop("iter must be numeric.")}
  if(iter < 1){iter <- 1; message("iter must be higher than 1.")
  warning("iter: must be between 1 and the number of elements.")}
  if(iter > length(D1save[1,])-1){
    if(iter > length(D1save[1,])){
      message("iter can't be higher than the number of element: ",length(D1save[1,]),".")
      warning("iter must be between 1 and the number of elements.")
    }
    iter <- length(D1save[1,])-1
  }

  cat("Resampling process:\n")
  #creation barre de progression(pb)
  pb <- utils::txtProgressBar(min = 0, max = length(D1save[1,]), style = 3)
  df <- NULL
  for (i in 1:length(D1save[1,])){
    for(j in sample((1:(length(D1save[,1])))[-i],iter,replace=FALSE)){
      if(iter == 1 && i == j){
        if(j == 1){j <- j + 1}else{j <- j - 1}
      }
      list <- generateClone(D1save,D2save,i,j)
      D1 <- list[[1]]
      D2 <- list[[2]]
      dist.dend <- c()
      for(alpha in alpha_seq){
        Mdist <- (alpha) * D1 + (1-alpha) * D2
        mixt.dist <- as.dist(Mdist)
        tree <- fastcluster::hclust(mixt.dist,method=method)
        new <- corCriterion(tree,D1,D2)
        dist.dend <- c(dist.dend,new)
      }
      if(is.null(df)){
        df <- data.frame(alpha = alpha_seq, height = dist.dend)
      }else{
        df <- cbind(df,dist.dend)
      }
    }
    mean <- unlist(lapply(as.data.frame(t(df[-1])),mean))
    if(is.null(save_lines)){
      save_lines <- cbind(df$alpha,mean)
    }else{
      save_lines <- cbind(save_lines,mean)
    }
    df <- NULL
    utils::setTxtProgressBar(pb, i)
  }
  close(pb)

  colnames(save_lines) <- c("alpha",rownames(D1save))
  rownames(save_lines) <- c()

  p <- plotly::plot_ly()
  for (k in 2:length(colnames(save_lines))) {
    p <- plotly::add_lines(p,x=save_lines[,1],y=save_lines[,k],name=colnames(save_lines)[k])
  }
  p
}

print.selectAlpha_obj <- function(x, ...){

  cat("        select Alpha | selectAlpha_obj

Estimated alpha:",x$alpha,"

call: ")
  print(x$call)
  cat("
value:
      .. $alpha             The estimate value of the parameter alpha (min CorCrit_alpha).
      .. $alpha.plot        The CorCrit for all the possible alpha.
    if resampling = TRUE
      .. $sd                The standard deviation.
      .. $conf              The confidence interval of alpha.
      .. $boxplot           The boxplot of the distribution.
      .. $values            All the potential alpha values obtained from clones.

documentation:
> ?hclustcompro_select_alpha
")
}

summary.selectAlpha_obj <- function(object, ...){
  cat("Estimate Alpha:",object$alpha,"    ( sd:",object$sd,")
Confidence interval: [",object$conf[1],";",object$conf[2],"]

Estimation calculated with",NROW(object$values),"observations.

?hclustcompro_select_alpha for details.
")
}

print.seriograph <- function(x, ...){
  cat("")
}

hclustcompro_select_alpha <- function(D1,D2,acc = 2, resampling = TRUE, method = "ward.D2",iter = 5, suppl_plot = TRUE){

  #===============================================================================
  # CAH CLASSIFICATION USING TWO DIST MATRIX
  #
  # Authors : A.COULON, L.BELLANGER and P.HUSI
  # Last update : 08 avril 2019
  #
  # Arguments:
  # D1	                  The fisrt distance matrix. Archeological context: Stratigraphic distance.
  # D2	                  The second distance matrix. Archeological context: ceramic distance.
  # acc             	    The accuracy to round.
  # resampling            Logical if we have to calculate confidence interval with resampling strategy
  # method                The method to use in hclust (see doc)
  #
  #===============================================================================

  METHODS <- c("ward.D", "single", "complete", "average", "mcquitty",
               "median", "centroid", "ward.D2")
  if (method == "ward") {
    message("The \"ward\" method has been renamed to \"ward.D\"; note new avaible method \"ward.D2\"")
    method <- "ward.D"
  }
  i.meth <- pmatch(method, METHODS)
  if (is.na(i.meth))
    stop("invalid clustering method", paste("", method))
  if (i.meth == -1)
    stop("ambiguous clustering method", paste("", method))
  method <- METHODS[i.meth]

  corCriterion_ <- function(alpha) {
    res <- c()
    for (elt in alpha){
      Mdist <- (elt)*D1 + (1-elt)*D2
      mixt.dist <- as.dist(Mdist)
      tree <- fastcluster::hclust(mixt.dist,method=method)
      d2 <- stats::cophenetic(tree)
      res <- c(res,abs(cor(as.dist(D1),d2) - cor(as.dist(D2),d2)))
    }
    return(res)
  }

  cor_ <- function(alpha) {
    c1 <- c()
    c2 <- c()
    for (elt in alpha){
      Mdist <- (elt)*D1 + (1-elt)*D2
      mixt.dist <- as.dist(Mdist)
      tree <- fastcluster::hclust(mixt.dist,method=method)
      d2 <- stats::cophenetic(tree)
      c1 <- c(c1, cor(as.dist(D1),d2))
      c2 <- c(c2, cor(as.dist(D2),d2))
    }
    return(list(c1,c2))
  }

  corCriterion_clone <- function(alpha,a,b) {
    res <- c()
    D1bis <- rbind(D1,clone=D1[a,])
    D1bis <- cbind(D1bis,clone=c(D1[a,],0))
    D2bis <- rbind(D2,clone=D2[b,])
    D2bis <- cbind(D2bis,clone=c(D2[b,],0))
    for (elt in alpha){
      Mdist <- (elt)*D1bis + (1-elt)*D2bis
      mixt.dist <- as.dist(Mdist)
      tree <- fastcluster::hclust(mixt.dist,method=method)
      d2 <- stats::cophenetic(tree)
      res <- c(res,abs(cor(as.dist(D1bis),d2) - cor(as.dist(D2bis),d2)))
    }
    return(res)
  }

  if(!is.numeric(acc)) {stop("acc must be numeric!")}
  if(acc < 1) {
    acc <- 1;
    message("The minimal value allowed for acc is 1.")
  }
  seq_acc=0.001
  if(acc==1){seq_acc=0.1}
  if(acc==2){seq_acc=0.01}
  if(acc==3){seq_acc=0.001}
  #test
  if(!"dist" %in% class(D1)){
    #matrix ?
    if(!is.matrix(D1)){
      message("generation of distances matrix.")
      D1 <- as.matrix(CAdist(D1,nPC="max",graph=FALSE))
    }
    #diag = 0 ?
    if(sum(diag(D1)) != 0){
      stop("D1: in distance matrix the diagonal is equal to 0.")
    }
    #symetric ?
    if(!isSymmetric(D1)){
      stop("D1 is not symmetric.")
    }
    if(!length(D1[,1]) == length(D1[1,])){stop("D1 not a square matrix.")}
  }
  if(!"dist" %in% class(D2)){
    #matrix ?
    if(!is.matrix(D2)){
      if(length(D2[1,]) == 3){
        message("generation of a temporal dissimilarities matrix.")
        D2 <- overlap(D2)
      }else{
        message("generation of a stratigraphic dissimilarities matrix.")
        D2 <- adjacency(D2)
        #diag = 0 ?
        if(sum(diag(D2)) != 0){
          stop("D2: The diagonal must be equal to 0.")
        }
      }
    }
    #symetric ?
    if(!isSymmetric(D2)){
      stop("D2 is not symmetric.")
    }
    if(!length(D2[,1]) == length(D2[1,])){stop("D2 not a square matrix.")}
  }

  D1 <- as.matrix(D1)
  D2 <- as.matrix(D2)
  #dist (positive matrix) ?
  if(min(D1) < 0){stop("D1 not a matrix dissimilarity (positive).")}
  if(min(D2) < 0){stop("D2 not a matrix distance (positive).")}
  if(median(D2) != 0 && median(D2) != 1){
    if(median(D1) == 0 || median(D1) == 1){
      message("We switch the two matrices. Stratigraphic data must be in D1 and ceramic data in D2.")
      tmp <- D1
      D1 <- D2
      D2 <- tmp
    }
  }
  #normalization
  if(max(D1) != 0){
    D1 <- D1 / max(D1)
  }
  if(max(D2) != 0){
    D2 <- D2 / max(D2)
  }
  #create mixing matrix
  D1 <- as.dist(D1)
  D2 <- as.dist(D2)
  D1 <- as.matrix(D1)
  D2 <- as.matrix(D2)
  #sort labels
  D1.Tmp <- D1[sort(labels(D1)[[1]]),sort(labels(D1)[[1]])]
  D2.Tmp <- D2[sort(labels(D2)[[1]]),sort(labels(D2)[[1]])]
  D1 <- as.matrix(D1.Tmp)
  D2 <- as.matrix(D2.Tmp)

  #resampling check
  if(iter == "max"){iter <- length(D1[1,])}
  if(!is.numeric(iter)){stop("iter must be numeric.")}
  if(!is.logical(resampling)){stop("resampling must be TRUE or FALSE.")}
  if(iter < 1){iter <- 1; message("iter must be higher than 1.")
  warning("iter: must be between 1 and the number of elements.")}
  if(iter > length(D1[1,])-1){
    if(iter > length(D1[1,])){
      message("iter can't be higher than the number of element: ",length(D1[1,]),".")
      warning("iter: must be between 1 and the number of elements.")
    }
    iter <- length(D1[1,])-1
  }

  if(suppl_plot){
    #estime alpha
    corCrit <- c()
    for(i in seq(0,1,seq_acc)){corCrit <- c(corCrit,corCriterion_(i))}
    index <- which(corCrit == min(corCrit))
    alpha <- seq(0,1,seq_acc)[index]

    add_text_resamp <- ""
    if(resampling){
      #echantillonnage
      cat("\nResampling process:\n")
      pb <- utils::txtProgressBar(min = 0, max = length(D1[1,]), style = 3)
      set <- c()
      res <- c()
      for (i in 1:length(D1[1,])){
        for(j in sample((1:(length(D1[,1])))[-i],iter,replace=FALSE)){
          if(i!=j){
            tmp <- stats::optimize(corCriterion_clone,lower=0,upper=1,a=i,b=j)
            set <- c(set,tmp$minimum)
          }
        }
        utils::setTxtProgressBar(pb, i)
      }

      res <- set
      sd <- arrondi(sd(res),acc)
      box <- quantile(res,c(0,0.025,0.25,0.5,0.75,0.975,1))
      conf <- c(arrondi(box[2],acc),arrondi(box[6],acc))
      values <- arrondi(res,acc)

      BoxPlotValue <- boxplot(
        res,
        horizontal = TRUE,
        range = 0,
        main="boxplot: Distribution of all the potentials values of alpha.",
        xlab=expression(alpha),
        xaxt="n"
      )
      axis(
        side = 1,
        at = c(box[1],box[3],box[4],box[5],box[7]),
        labels = arrondi(c(box[1],box[3],box[4],box[5],box[7]),acc)
      )
      points(arrondi(mean(res),acc),1,col="red")
      text(arrondi(mean(res),acc),1.3,
           bquote(hat(alpha)^"*" == .(arrondi(mean(res),acc))))
      boxplot <- grDevices::recordPlot()

      add_text_resamp <- paste(" The IC95% is calculated with",iter,'*',length(D1[1,]),
                               "clones out of",length(D1[1,])-1,"*",length(D1[1,]),"available.")
    }


    plot(
      seq(0,1,seq_acc),corCriterion_(seq(0,1,seq_acc)),type="l",
      xlab = expression(alpha),
      ylab = expression(CorCrit[alpha]),
      ylim=c(0,1),
      axes = TRUE,
      main = "Select alpha",
      sub = paste0("method: ",method,".",add_text_resamp),
      xaxp = c(0,1,5),
      mgp= c(2, 1,0),
      col = "slateblue2",
      xaxt= "n",
      lwd=1
    )
    correlation_curve <- cor_(seq(0,1,seq_acc))
    lines(seq(0,1,seq_acc),correlation_curve[[1]],lty=3,col="grey60")
    lines(seq(0,1,seq_acc),correlation_curve[[2]],lty=3,col="grey60")
    axis(
      side = 1,
      at = c(.1,.2,.3,.4,.5,.6,.7,.8,.9),
      labels = rep("",9),
      col = "grey"
    )
    axis(
      side = 1,
      at = arrondi(alpha,acc),
      labels = rep("",length(alpha)),
      col = "slateblue2"
    )
    if(resampling){
      axis(
        side = 1,
        at = c(0,arrondi(box[2],acc),arrondi(box[6],acc),1),
        labels = c(0,arrondi(box[2],acc),arrondi(box[6],acc),1)
      )
      abline(v = arrondi(box[6],acc),col = "gray50",lty=3,lwd=2)
      abline(v = arrondi(box[2],acc),col = "gray50",lty=3,lwd=2)
      close(pb)
    }else{
      axis(
        side = 1,
        at = c(0,1),
        labels = c(0,1)
      )
    }

    for(i in 1:length(alpha)){
      graphics::mtext(arrondi(alpha[i],acc),side = 1,line = .4,at = arrondi(alpha[i],acc),col = "slateblue2")
    }
    alpha.plot <- grDevices::recordPlot()

    if(resampling){
      return(structure(
        list(alpha = arrondi(alpha,acc),
             alpha.plot = alpha.plot,
             sd = sd,
             conf = conf,
             boxplot = boxplot,
             values = values
        ),
        class = c("selectAlpha_obj","list")
      )
      )
    }else{
      return(structure(
        list(alpha = arrondi(alpha,acc),
             alpha.plot = alpha.plot,
             more = "Need resampling = TRUE!"
        ),
        class = c("selectAlpha_obj","list")
      )
      )
    }
  }else{

    #estime alpha
    #alpha <- stats::optimize(corCriterion_,lower=0,upper=1)$minimum
    corCrit <- c()
    for(i in seq(0,1,seq_acc)){corCrit <- c(corCrit,corCriterion_(i))}
    index <- which(corCrit == min(corCrit))
    alpha <- seq(0,1,seq_acc)[index]

    if(resampling){
      pb <- utils::txtProgressBar(min = 0, max = length(D1[1,]), style = 3)
      set <- c()
      res <- c()
      for (i in 1:length(D1[1,])){
        for(j in sample((1:(length(D1[,1])))[-i],iter,replace=FALSE)){
          if(i!=j){
            tmp <- stats::optimize(corCriterion_clone,lower=0,upper=1,a=i,b=j)
            set <- c(set,tmp$minimum)
          }
        }
        utils::setTxtProgressBar(pb, i)
      }
      conf <- arrondi(quantile(set,c(0.025,0.975)),acc)
      close(pb)
      return(structure(
        list(alpha = arrondi(alpha,acc),
             conf = conf,
             more = "Need suppl_plot = TRUE!"
        ),
        class = c("selectAlpha_obj","list")
      )
      )
    }else{
      return(structure(
        list(alpha = arrondi(alpha,acc),
             more = "Need suppl_plot = TRUE!"
        ),
        class = c("selectAlpha_obj","list")
      )
      )
    }


  }


}

timerange <- function(data, cluster = NULL, add = NULL, density = NULL, color = NULL, reorder = FALSE){

  #test
  if(is.null(cluster)){cluster <- rep(1,dim(data)[1])}
  if(!is.data.frame(data)){stop("data must be a data.frame.")}
  if(!is.null(add)){
    if(!is.data.frame(add)){stop("add must be a data.frame (even if there is only one column).")}
  }
  if(!is.vector(cluster)){stop("cluster must be a vector.")}
  if(!is.null(density)){
    if(!is.vector(density)){stop("cluster must be a vector.")}
  }

  #call fonction overlap plot
  df <- cbind(data,cluster)
  res <- overlap_plot(df,add,density,color,reorder_color = reorder)

  return(structure(list(order = res$order, plot = res$plot, density = res$density, cluster = res$reorder_cluster), class = c("temp_obj", "list")))
}

histogram <- function(df, cluster = NULL, order = NULL, k = 4, reorder_color = FALSE){
  ################################################################################################
  # df : Borne inf, Borne sup, Observation (ici GT), Nombre d'Observation (ici NTI), Numéro de groupe
  # k : les k observations les plus représentees
  ################################################################################################

  Cluster <- c()
  reorder <- TRUE

  # df <- data_NTI[,1:3]
  # cluster <- data_NTI[,4]
  # order <- order_NTI
  # k = 4

  #|
  #| check inputs
  #|

  if(is.null(cluster)){cluster <- rep(1,dim(df)[1])}

  if(length(df[1,]) == 2){
    df <- data.frame(
      niv1 <- df[,1],
      niv2 <- df[,1],
      Observation_number <- df[,2]
    )
  }

  if(k < 1){k<-1}
  df <- cbind(df,cluster)
  names(df) = c("niv1", "niv2", "Observation_number", "Cluster")
  ncol <- length(unique(trunc(df$Cluster)))
  df$Cluster = as.factor(df$Cluster)

  if(is.null(order)){
    order <- unique(df$Cluster)
    reorder <- FALSE
  }

  ### on ordonne
  df_order = {}
  for(i in order){df_order = rbind(df_order, df[df$Cluster == i,])}
  df = df_order

  if(reorder){
    ## Re numerotation (ancien au recent)
    order_tmp <- as.numeric(order)
    Cluster_tmp <- as.numeric(as.character(df$Cluster))

    new <- c()
    count <- 1
    for (i in 1:length(order_tmp)){
      if(order_tmp[i] == trunc(order_tmp[i])){
        new <- c(new,count)
        count <- count + 1
      }
      if(order_tmp[i] != trunc(order_tmp[i])){
        t <- order_tmp[i]
        if(i != length(order_tmp)){
          tun <- order_tmp[i+1]
        }else{
          tun <- 0
        }
        if(trunc(t) == trunc(tun)){
          new <- c(new,count+order_tmp[i]-trunc(t))
        }else{
          new <- c(new,count+order_tmp[i]-trunc(t))
          count <- count + 1
        }
      }
    }
    new <- sort(new)
    Cluster_tmp <- Cluster_tmp * 10
    for(i in 1:length(new)){
      Cluster_tmp[Cluster_tmp == 10*order_tmp[i]] <- new[i]
    }
    df$Cluster <- as.factor(Cluster_tmp)
  }


  if(sort(unique(as.character(df$niv1) == as.character(df$niv2)))){
    #### On définit les couleurs

    col <- rainbow_hcl(ncol,
                       c = 80, l = 60, start = 0,
                       end = 360*(ncol-1)/ncol)

    plot <- list()
    for(i in unique(df$Cluster)){
      ## On recupere les donnees correspondant aux groupes i
      dg = df[df$Cluster == i,]

      ## On regroupe les données en fonction de Observations et on compte le nombre de Observation_number
      ## On calcule la proportion
      dg = dg %>% group_by(niv1) %>% summarise(Observation_number = sum(Observation_number),
                                               Cluster = first(Cluster)) %>% arrange(-Observation_number)
      dg$sum_Observations = sum(dg$Observation_number)
      dg$Proportion = dg$Observation_number / dg$sum_Observations

      data <- dg[order(dg$Proportion,decreasing=T),][1:k,]

      data$niv1 = factor(data$niv1, levels = data$niv1)
      col_index <- trunc(as.numeric(i))

      p1 <- plot_ly(data, x = data$niv1, y = data$Proportion, type = "bar",hoverinfo = 'text',
                    textposition = 'auto',showlegend = F,
                    hovertext = paste("</br>", data$niv1,
                                 "</br> Proportion :",arrondi(data$Proportion,3),
                                 "</br> Number of observation :",data$Observation_number,
                                 "</br> Cluster :", data$Cluster
                    ),
                    marker = list(
                      color = col[col_index]
                    )
      ) %>%
        layout(
          title = "",
          yaxis = list(title = "Proportion", showline = FALSE, dtick = .1,range= c(0,1)),
          xaxis = list(title = paste("Class",(as.numeric(as.character(data$Cluster)))),tickangle = 90)
        )

      plot[[i]] <- p1
    }

    plot2 <- subplot(plot,shareY = TRUE,shareX=TRUE,margin = 0.003)
    plot2
    return(structure(list(plot = plot2), class = c("prop_obj", "list")))

  }else{
    plot <- list()
    for(i in unique(df$Cluster)){
      #### On définit les couleurs
      col <- rainbow_hcl(length(unique(trunc(as.numeric(cluster)))),
                         c = 80, l = 60, start = 0,
                         end = 360*(length(unique(trunc(as.numeric(cluster))))-1) /
                           (length(unique(trunc(as.numeric(cluster))))))

      ## On recupere les donnees correspondant aux groupes i
      dcluster = df[df$Cluster == i,]
      sum_Observations = sum(dcluster$Observation_number)

      nb <- dcluster %>% group_by(niv1) %>% summarise(n = length(unique(niv2)),sum = sum(Observation_number))
      max <- length(nb$n)
      if(k > max){k_tmp <- max}else{k_tmp <- k}
      nb <- nb[order(nb$sum,decreasing=TRUE),][1:k_tmp,]

      clustplot <- list()
      if(k > 10){k <- 10}
      for(l in 1:k){
        if(l > k_tmp){
          p1 <- plot_ly(x= "",y = 0, type = "bar",hoverinfo = 'text',
                        showlegend = F,
                        marker = list(
                          color = col[1],
                          line = list(
                            color = 'black',
                            width = 1
                          )
                        )
          )
        }else{
          test <- dcluster[dcluster$niv1 == as.character(nb$niv1)[l],] %>% group_by(niv2) %>%
            summarise(nb=sum(Observation_number))
          test$nb <- test$nb / sum_Observations
          test$niv2 = factor(test$niv2, levels = test$niv2)
          test <- test[order(test$nb,decreasing=TRUE),]

          p1 <- plot_ly(test, x = as.character(nb$niv1)[l], y = test$nb, type = "bar", hoverinfo = 'text',
                        textposition = 'auto',showlegend = F,
                        hovertext = paste("</br>", as.character(nb$niv1)[l],
                                     " -- ", test$niv2,
                                     "</br> Proportion :",arrondi(test$nb,3),
                                     "</br> Number of observation :",arrondi(test$nb * sum_Observations,0),
                                     "</br> Cluster :", i
                        ),
                        marker = list(
                          color = col[as.numeric(i)],
                          line = list(
                            color = 'white',
                            width = 1
                          )
                        )
          )
          p1 <- p1 %>% layout(yaxis = list(title = 'Proportion'), barmode = 'stack',
                              title = "",
                              xaxis = list(title = "", tickangle = -45),
                              yaxis = list(title = "Proportion", showline = FALSE, dtick = .1,range= c(0,1)))

        }
        clustplot[[l]] <- p1
        plot[[i]] <- subplot(clustplot,shareY = TRUE,shareX=TRUE,margin = 0)
        }

    }
    if(k*length(unique(df$Cluster)) > 99){nr <- 2;margin <- c(0.004,0.004,.05,.05)}else{nr <- 1;margin <- 0.004}
    plot2 <- subplot(plot, shareY = TRUE, shareX = FALSE, margin = margin,nrows=nr)
    plot2
    return(structure(list(plot = plot2), class = c("prop_obj", "list")))
  }
}

#overload hclust
hclust <- function(d, method = "complete", members = NULL, d2 = NULL, alpha = NULL){
  if (!is.null(d2)) {
    if(!length(d) == length(d2)){stop("d and d2 have not the same size.")}
    if (is.null(alpha)) {
      sa <- hclustcompro_select_alpha(d, d2, method = method, resampling = FALSE)
      alpha <- sa$alpha[1]
    }
    alpha <- as.numeric(alpha)
    if(!(alpha>=0 & alpha<=1)){
      warning("alpha must be between 0 and 1.")
      sa <- hclustcompro_select_alpha(d, d2, method = method, resampling = FALSE)
      alpha <- sa$alpha[1]
    }
    #normalization
    if(max(d) != 0){
      d <- d / max(d)
    }
    if(max(d2) != 0){
      d2 <- d2 / max(d2)
    }
    d <- as.dist(alpha * d + (1 - alpha) * d2)
  }
  fastcluster::hclust(d, method, members)
}
Any scripts or data that you put into this service are public.
SPARTAAS documentation built on June 27, 2024, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
SPARTAAS
Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

R/perioclust.R
In SPARTAAS: Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

Defines functions hclustcompro_select_alpha print.seriograph summary.selectAlpha_obj print.selectAlpha_obj hclustcompro_detail_resampling print.hclustcompro_cl seriograph hclustcompro_subdivide hclustcompro overlap adjacency CAdist

Documented in adjacency CAdist hclustcompro hclustcompro_select_alpha hclustcompro_subdivide overlap seriograph

Try the SPARTAAS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SPARTAAS Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

R/perioclust.R In SPARTAAS: Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

Defines functions hclustcompro_select_alpha print.seriograph summary.selectAlpha_obj print.selectAlpha_obj hclustcompro_detail_resampling print.hclustcompro_cl seriograph hclustcompro_subdivide hclustcompro overlap adjacency CAdist

Documented in adjacency CAdist hclustcompro hclustcompro_select_alpha hclustcompro_subdivide overlap seriograph

Try the SPARTAAS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SPARTAAS
Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS

R/perioclust.R
In SPARTAAS: Statistical Pattern Recognition and daTing using Archaeological Artefacts assemblageS