#' Hierarchical clustering of the NISP data
#' @name zoo_hc
#' @description Hierarchical clustering of the NISP data by periods
#'
#' @param df a dataframe
#' @param num_column the column name of assemblage numbers
#' @param site_column the column name of assemblage sites
#' @param period_column the column name of assemblage periods
#' @param percBOTA_column the column name of assemblage percents of BOTA
#' @param percOC_column the column name of assemblage percents of OC
#' @param percSUDO_column the column name of assemblage percents of SUDO
#' @param typSite_column the column name of assemblage site types
#' @param hc.meth the used HC method
#' @param hdend the maximum height of the dendrogram
#' @param labels_cex the size of the labels (site numbers)
#' @param leaves_cex the size of the symbols (site symbols)
#' @param axis_text the size of the axis labels
#' @param per.label.ang the angle of the period label
#' @param per.label.sz the size of the period label
#' @param lorder_period a vector with the ordered periods
#' @param typsit_symb a dataframe with the symbology
#' @param export.plot if TRUE, save the plot
#' @param hc.name the name of the output plot if saved
#' @param plot.width,plot.height,plot.dpi the dimensions and resolution of the plot if saved
#' @param verbose if TRUE (default): verbose
#'
#' @return a gglot with the different HC by period
#'
#' @examples
#'
#' df <- zoo_read()
#' lorder_period <- zoo_order_period(df)
#' typsit_symb <- zoo_legends(worksheets = c("sites_types"))
#' zoo_hc(df = df, lorder_period = lorder_period, typsit_symb = typsit_symb,
#' dir.out = "C:/Rprojects/zoowork/www/",
#' plot.width = 6, plot.height = 10,
#' per.label.sz = 2)
#'
#' @export
zoo_hc <- function(df = NA,
num_column = "num",
site_column = "site",
period_column = "period",
percBOTA_column = "percBOTA",
percOC_column = "percOC",
percSUDO_column = "percSUDO",
typSite_column = "type.site",
hc.meth = "complete",
hdend = 130,
labels_cex = .3,
leaves_cex = 1.2,
axis_text = 4,
per.label.ang = 270,
per.label.sz = 3,
lorder_period = NA,
typsit_symb = NA,
export.plot = T,
dirOut = paste0(system.file(package = "zoowork"), "/results/"),
hc.name = "hc.png",
plot.width = 19.5,
plot.height = 22.2,
plot.dpi = 300,
verbose = TRUE){
ldendro <- list() # to store the trees
lmaxheight <- c() # to store the max height of the tree
ldendro_tsit <- list()
lheights_tsit <- c()
# the following part is ~ the same as zoo_ca()
# - - - - - - - - - - - - - - -
perCA_tsit <- data.frame(perCA1 = 0,
perCA2 = 0,
per = "xx")
ca_all_tsite <- data.frame(num = 'xx',
site = 'xx',
type.site = 'xx',
CA1 = 0,
CA2 = 0,
percBOTA = 0,
percSUDO = 0,
percOC = 0,
per = 'xx',
shape = 0,
color = 'xx')
for (per in lorder_period){
# per <- "MIA2"
if(verbose){print(per)}
df_per <- df[df[ , period_column] %in% per,] # sélectionne sur périodes
df_per <- df_per[complete.cases(df_per), ]
row.names(df_per) <- df_per[ , num_column]
df_lda.per <- df_per[ , c(percBOTA_column, percSUDO_column, percOC_column, typSite_column)]
if(nrow(df_per) > 3){
if(verbose){print(" - run HC")}
xdat <- df_lda.per[ , -which(names(df_lda.per) %in% c(typSite_column))]
ca <- FactoMineR::CA(xdat,graph = FALSE) # AFC
inertCA1 <- round(as.numeric(ca$eig[, 2][1]), 1)
inertCA2 <- round(as.numeric(ca$eig[, 2][2]), 1)
# show %
perCA_tsit <- rbind(perCA_tsit, data.frame(perCA1 = inertCA1,
perCA2 = inertCA2,
per = per))
coords_ind_ca <- as.data.frame(ca$row$coord)
coords_var_ca <- as.data.frame(ca$col$coord)
coords_ca <- rbind(coords_ind_ca,coords_var_ca)
colnames(coords_ca)[1] <- 'CA1'
colnames(coords_ca)[2] <- 'CA2'
dataset.p <- merge(df_lda.per, coords_ca, by = "row.names", all.y = T)
dataset.ps <- merge(dataset.p, typsit_symb, by.x = typSite_column, by.y = "tsite", all.x = T)
dataset.ps$per <- per
dataset.ps$color <- as.character(dataset.ps$color)
# - - - - - - - - - - - - - - -
dataset.ps$shape <- as.factor(dataset.ps$shape)
names(dataset.ps)[names(dataset.ps) == 'Row.names'] <- num_column
df_per_site <- df_per[ , c(site_column, num_column)]
ff <- merge(dataset.ps, df_per_site, by = num_column, all.x = T)
# ff <- ff[ , colnames(ca_all_tsite)]
matches <- colnames(ca_all_tsite) # reorder
ff <- ff[ ,match(matches, colnames(ff))]
ca_all_tsite <- rbind(ca_all_tsite,ff)
dd <- na.omit(ff) # rm var
rownames(dd) <- dd[ , num_column]
dd[ , num_column] <- NULL
xxdat <- dd[ , c(percBOTA_column, percSUDO_column, percOC_column)]
symb_dd <- merge (xxdat, ff, by.x = "row.names", by.y = num_column)
colnames(symb_dd)[which(names(symb_dd) == paste0(percBOTA_column, ".x"))] <- percBOTA_column
colnames(symb_dd)[which(names(symb_dd) == paste0(percOC_column, ".x"))] <- percOC_column
colnames(symb_dd)[which(names(symb_dd) == paste0(percSUDO_column, ".x"))] <- percSUDO_column
rownames(symb_dd) <- symb_dd$Row.names
#colnames(symb_dd)[which(names(symb_dd)=="Row.names")] <- 'num'
symb_dd[ , paste0(percBOTA_column, ".y")] <- NULL
symb_dd[ , paste0(percOC_column, ".y")] <- NULL
symb_dd[ , paste0(percSUDO_column, ".y")] <- NULL
# symb_dd$Row.names <- symb_dd$percBOTA.y <- symb_dd$percSUDO.y <- symb_dd$percOC.y <- NULL
dend1 <- symb_dd[ , c(percBOTA_column, percSUDO_column, percOC_column)] %>%
dist %>% hclust(method = hc.meth)
ldendro[[length(ldendro) + 1]] <- dend1 # store
lmaxheight <- c(lmaxheight, max(dend1$height))
dend1 <- dend1 %>% as.dendrogram # cr?? un 'dendrogram'
# r?ordonne le tab de donn?es apr?s le clustering
symb_dd$ord <- seq(1, nrow(symb_dd)) # ord initial
symb_dd <- symb_dd[match(order.dendrogram(dend1), symb_dd$ord), ] # reord
dend1 <- dend1 %>%
dendextend::set("branches_lwd", .2) %>%
dendextend::set("labels_cex", labels_cex) %>% #.3
dendextend::set("leaves_pch", as.numeric(as.character(symb_dd$shape))) %>% # node point type
dendextend::set("leaves_cex", leaves_cex) %>% # 1.2
dendextend::set("leaves_col", symb_dd$color)
ggd1 <- dendextend::as.ggdend(dend1) # tranform en 'ggdend'
ggdend <- ggplot2::ggplot(ggd1,
horiz = TRUE,
theme = ggplot2::theme_minimal(),
offset_labels = -8) +
# ggplot2::ylim(hdend, 0) + # fixe l'emprise pour comparer plusieurs dendro
ggplot2::theme(panel.grid.major.y = ggplot2::element_blank(),
panel.grid.minor.y = ggplot2::element_blank(),
axis.text.y = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(size = axis_text)) +
ggplot2::annotate(geom = "text", # write the period
x = as.integer(nrow(symb_dd)/2), # au milieu du jeu de donn?es
y = hdend,
vjust = 0,
hjust = 0,
angle = per.label.ang,
label = unique(symb_dd$per),
size = per.label.sz) +
ggplot2::theme(plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt"))+
ggplot2::scale_y_continuous(breaks = seq(hdend, 10, by = -20))
ldendro_tsit[[length(ldendro_tsit) + 1]] <- ggdend # add dendro
lheights_tsit <- c(lheights_tsit, nrow(xxdat)) # dendro size
} else {
if(verbose){
print(paste0("There's only ", nrow(df_per),
" individual in the dataframe, no HC can be computed"))
}
}
}
if(export.plot){
dir.create(dirOut, showWarnings = FALSE)
gout <- paste0(dirOut, hc.name)
ggplot2::ggsave(file = gout,
gridExtra::arrangeGrob(grobs = ldendro_tsit, ncol = 1, heights = lheights_tsit),
width = plot.width, height = plot.height, units = "cm",
dpi = plot.dpi) ## save plot
if(verbose){print(paste0("The plot '", hc.name,"' has been saved in '", dirOut,"'"))}
} else {
return(ldendro_tsit)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.