R/LVIclus.R

Defines functions LVIclust

Documented in LVIclust

#'@title Local variables importance (LVI) clustering from GWRFC outputs
#'@description This function clusters local variables importance (LVI) output with different methods, including: Gaussian Mixture Modelling (mclust), Kohonen's Self-Organizing Maps (kohonen), or hierarchical clustering (hclust)
#'@param input_LVI string or Spatial-class. Input shapefile of GWRFC LVI output. It can be the filename of the shapefile or an object of class SpatialPolygonsDataFrame or SpatialPointsDataFrame.
#'@param remove_columns string. Remove specific variables from \strong{input_LVI}. Variables are identified by column name. NA ignores column remove. By default, column "ID_row" is removed.
#'@param method_clustering string. A method to use for clustering. It can be:"ward.D","ward.D2","single","complete","average","mcquitty","median", "centroid", "mclust" or "SOM". The latter, is calculated with the kohonen library.
#'@param ncluster numeric. Number of clusters to apply. Should be more than 2.
#'@param plots logical. If TRUE, plots from clustering libraries are generated and stored at \strong{output_folder} (except for mclust).
#'@param output_folder string. Output folder where LVIclust outputs will be stored.
#'@return The output of this function is a shapefile with the resulting clusters and its plot if it was specified.
#'@examples
#'
#'#based in the example showed with the execution of GWRFC
#'
#'LVIclust(input_LVI = "E:/demo/deforestation/GWRFC_ADP_400_EX_LVI.shp", #filename of the GWRFC LVI output
#'remove_columns=NA,
#'method_clustering="mclust", #hierarchical clustering is applied here.
#'ncluster = 2, #number of clusters.
#'plots=T, #available only for all hierarchical clustering methods and kohonen.
#'output_folder = "E:/demo/deforestation") #check this folder for outputs generated by the function.
#'
#'@export

LVIclust <- function(
  input_LVI,
  remove_columns=NA,
  method_clustering="ward.D2",
  ncluster=2,
  plots=T,
  output_folder
){

  ##### PREPARE DATA #####

  print("Reading data...")

  #random + test
  set.seed(666)
  if(ncluster==1){
    stop("Clustering can´t be less than 2")
  }
  #folder
  dir.create(output_folder,showWarnings = F, recursive = T)
  #read LVI
  if(class(input_LVI)=="SpatialPolygonsDataFrame"|class(input_LVI)=="SpatialPointsDataFrame"){
    lvi.shp <- input_LVI
    na.gwrfc <- complete.cases(lvi.shp@data)
    lvi.shp <- lvi.shp[na.gwrfc,]
  }else{
    lvi.shp <- shapefile(input_LVI)
    na.gwrfc <- complete.cases(lvi.shp@data)
    lvi.shp <- lvi.shp[na.gwrfc,]
  }
  #remove columns?
  if(!is.na(remove_columns)[1]){
    if(length(grep(paste(remove_columns,collapse="|"),names(lvi.shp))) != 0){
      lvi.shp <- lvi.shp[,!names(lvi.shp) %in% remove_columns]
    }else{
      stop("remove_columns not found at input_shapefile. Verify its names.")
    }
  }

  #### FUNCTIONS ####

  plot.SOM <- function(x){
    if(ncol(x)!=9){
      x.na <- 9-length(x)
      x.na <- lapply(x.na,function(y){
        y <- rep(NA,nrow(x))
      })
      x <- cbind(x,as.data.frame(x.na))
    }
    jpeg(output.name, width = 1000, height = 1000)
    par(mfrow=c(3,3))
    for(j in 1:9){
      if(all(is.na(x[,j]))){
        plot(0,type='n',axes=FALSE,ann=FALSE)
      }else{
        plot(som.model,
             type = "property",
             property=x[,j],
             main=names(x)[j],
        )
      }
    }
    dev.off()
  }

  #### CLUSTERING ####

  print("Start clustering...")


  # CHECK CLUSTER NUMBER

  if(ncluster <= 1){
    stop("ncluster can´t be less than 2")
  }else if(is.character(ncluster)){
    stop("ncluster should be numeric")
  }

  # PREPARE DATA

  lvi.clus <- lvi.shp@data[,2:ncol(lvi.shp@data)]
  clus.shp <- lvi.shp[,1,drop=F]

  # MCCLUST

  if(method_clustering=="mclust"){
    #apply mclust
    mc.cluster <- Mclust(lvi.clus, G=ncluster)
    #assign CLUSTER
    clus.shp@data$CLUSTER <- mc.cluster$classification

  }else if(method_clustering=="SOM"){
    #apply SOM
    som.grid <- somgrid(xdim = 5, ydim=5, topo="hexagonal")
    som.data <- as.matrix(scale(lvi.clus))
    som.model <- som(som.data, grid = som.grid)
    som.cluster <- cutree(hclust(dist(unlist(som.model$codes))),k=ncluster)
    #plot SOM quality
    if(plots){
      som.plots <- c("codes", "changes", "counts","dist.neighbours", "mapping", "quality")
      output.name <- paste0(output_folder,"/LVI_",ncluster,"_SOM_quality.jpg")
      jpeg(output.name, width = 1000, height = 1000)
      par(mfrow=c(3,3))
      for(j in 1:7){
        if(j!=7){
          plot(som.model, type=som.plots[j])
        }else{
          plot(som.model,
               type = "property",
               property=som.cluster,
               main="clusters")
        }
      }
      dev.off()
      #plot LVI
      som.plots <- names(lvi.shp@data[,2:ncol(lvi.shp@data)])
      som.plots <- split(som.plots, ceiling(seq_along(som.plots)/9))
      for(j in 1:length(som.plots)){
        output.name <- paste0(output_folder,"/LVI_",ncluster,"_SOM_variables_",j,".jpg")
        if(length(som.plots[[j]])==9){
          plot.SOM(lvi.shp@data[,som.plots[[j]]])
        }
      }
    }
    #assign CLUSTER
    clus.shp@data$CLUSTER <- som.cluster[som.model$unit.classif]

  }else{

    #apply HC

    #get recommended clusters
    hc.cluster <- hclust(dist(lvi.clus), method = method_clustering)
    hc.class <- cutree(hc.cluster, k = ncluster)
    if(plots){
      output.name <- paste0(output_folder,"/LVI_",ncluster,"_HC_tree.jpg")
      jpeg(output.name, width = 1000, height = 1000)
      plot(x = hc.cluster, labels =  row.names(hc.cluster), cex = 0.5)
      rect.hclust(tree = hc.cluster, k = ncluster, which = 1:ncluster, border = 1:ncluster, cluster = hc.class)
      dev.off()
    }
    #add data to LVI
    clus.shp@data$CLUSTER <- hc.class
  }

  #output file
  output.name <- paste0(output_folder,"/LVI_",ncluster,"_clusters.shp")
  shapefile(clus.shp,output.name,overwrite=T)

  #end
  print("****LVIclust end sucessfully*****")
  return(clus.shp)
}
FSantosCodes/GWRFC documentation built on Sept. 24, 2023, 6:07 a.m.