library(pangaear) library(tidyverse) library(stringr) library(here) library(sf) library(parallel) library(fastcluster) library(dbscan) library(progress) library(purrr) library(lwgeom) library(glue) library(measurements) library(mco) knitr::opts_chunk$set(echo = TRUE)
version <- "202201170917" raw_data_path <- here::here('analysis', 'data', 'raw_data') derived_data_path <- here::here('analysis', 'data', 'derived_data', version) derived_tables_path <- glue::glue(derived_data_path, '/tables') dir.create(raw_data_path, showWarnings = FALSE, recursive = TRUE) dir.create(derived_data_path, showWarnings = FALSE, recursive = TRUE) dir.create(derived_tables_path, showWarnings = FALSE, recursive = TRUE)
if(!file.exists(glue::glue(raw_data_path, "/Ecoregions2017.zip"))){ to <- getOption('timeout') options(timeout=600) download.file(url = "https://storage.googleapis.com/teow2016/Ecoregions2017.zip", destfile = glue::glue(raw_data_path, "/Ecoregions2017.zip")) options(timeout=to) unzip(zipfile = glue::glue(raw_data_path, "/Ecoregions2017.zip"), overwrite = TRUE, exdir = raw_data_path) } ecoregions <- sf::st_read(glue::glue(raw_data_path, "/Ecoregions2017.shp")) %>% dplyr::transmute(biome_name = BIOME_NAME, biome_code = BIOME_NUM, eco_name = ECO_NAME, eco_code = ECO_ID) |> sf::st_make_valid()
For mining areas we use the mining polygons data set available from 10.1594/PANGAEA.910894. This data set is described here 10.1038/s41597-020-00624-w. The polygons cover the entire globe.
# pg_record <- pangaear::pg_data(doi = "10.1594/PANGAEA.910894", overwrite = TRUE, mssgs = TRUE) # unzip(zipfile = pg_record[[1]]$path, # files = "global_mining_polygons_v1.gpkg", # overwrite = TRUE, # exdir = raw_data_path) mining_polygons <- sf::st_read(glue::glue(derived_data_path, "/global_mining_polygons_v2.gpkg"), fid_column_name = "fid") %>% dplyr::transmute(id = stringr::str_pad(as.hexmode(fid), pad = "0", 20), area = AREA, country_name = COUNTRY_NAME, country_isoa3 = ISO3_CODE) |> dplyr::rename(geometry = geom)
file_path <- glue::glue(derived_tables_path, "/mining_polygons_ecoregions.geojson") if(file.exists(file_path)){ mining_polygons_ecoregions <- sf::st_read(file_path, stringsAsFactors = FALSE, fid_column_name = "id") } else { nc <- parallel::detectCores() nr <- nrow(mining_polygons) system.time( mining_polygons_ecoregions <- mining_polygons %>% split(., rep(1:ceiling(nr/nc), each=nc, length.out=nr)) %>% parallel::mclapply(mc.cores = nc, FUN = sf::st_join, left = TRUE, y = ecoregions, join = sf::st_nearest_feature) %>% dplyr::bind_rows() ) sf::st_write(mining_polygons_ecoregions, file_path) }
In this step we calculate the distances among all mining features in meters.
The results are saved to the output_dir
per each country.
mine_properties <- glue::glue(raw_data_path, "/mining_commodities.gpkg") %>% sf::st_read(stringsAsFactors = FALSE) %>% dplyr::left_join(readr::read_csv(glue::glue(raw_data_path, "/snl_isoa3_country_tbl.csv")), by = c("country" = "snl_country")) %>% dplyr::mutate(id = str_pad(as.hexmode(snl_id), pad = "0", 20)) %>% dplyr::select(id, mine_name = mine, known_as, commodities_list = list_of_commodities, development_stage, operating_status, country_name = COUNTRY_NAME, country_isoa3 = ISO3_CODE, geometry = geom) mine_polygons <- mining_polygons_ecoregions %>% dplyr::transmute(dataset_id = id, dataset_name = "polygons", country_isoa3 = country_isoa3) mine_points <- mine_properties %>% dplyr::transmute(dataset_id = id, dataset_name = "points", country_isoa3 = country_isoa3) mine_features <- dplyr::bind_rows(mine_points, mine_polygons) %>% dplyr::mutate(id = str_pad(dplyr::row_number(), pad = "0", 20)) sf::st_write(mine_features, glue::glue(derived_data_path, "/mine_features.geojson"), delete_dsn = TRUE) mine_split <- split(mine_features, mine_features$country_isoa3) pb <- progress::progress_bar$new(total = length(mine_split)) system.time(dist_matrices <- purrr::map_dfr( .x = mine_split, .f = mininglucc::calc_dist_matrix, split_att = "country_isoa3", output_dir = derived_data_path, pb = pb) )
Below we cluster mining features based on the distance matrix for each country.
We set the cutting distance to 10 km, see ?fastcluster::hclust
and ?stats::cutree
.
Features further than 10 km from each other are related only if other features connect them,
see 10.1038/s41597-020-00624-w.
DBSCAN algorithm works better to cluster mining points and polygons because of
the heterogeneity of the data. I set minPts=1
to allow for clusters composed of
a single feature. The argument eps
was set to 20000
, so that algorithm is flexible
to cluster distant mining features. Though, in many cases the clusters are kept small.
The results were visually checked.
h <- units::set_units(10000, m) dist_files <- dir(glue::glue(derived_data_path, "/dist_matrix"), full.names = TRUE) names(dist_files) <- stringr::str_remove_all(basename(dist_files), ".rds") mine_clusters <- parallel::mclapply(names(mine_split), function(f){ hcluster <- 1 dbcluster <- 1 if(f %in% names(dist_files)){ dist_matrix <- readRDS(dist_files[[f]]) hcluster <- fastcluster::hclust(dist_matrix, method = "single") %>% cutree(h = as.numeric(units::set_units(h, m))) dbcluster <- dbscan::dbscan(dist_matrix, eps = as.numeric(h), minPts = 1)$cluster } mine_split[[f]] %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::transmute(hcluster_id = hcluster, dbcluster_id = dbcluster, dataset_id = dataset_id, country_isoa3 = country_isoa3, dataset_name = dataset_name) }) %>% dplyr::bind_rows() %>% dplyr::group_by(country_isoa3, hcluster_id) %>% dplyr::mutate(hcluster_id = str_pad(cur_group_id(), pad = "0", 20)) %>% dplyr::ungroup() %>% dplyr::group_by(country_isoa3, dbcluster_id) %>% dplyr::mutate(dbcluster_id = str_pad(cur_group_id(), pad = "0", 20)) %>% dplyr::ungroup() %>% dplyr::select(hcluster_id, dbcluster_id, dataset_id, dataset_name)
mining_polygons_ecoregions %>% dplyr::select(id, country_isoa3, country_name, area, biome_name, biome_code, eco_name, eco_code) %>% dplyr::left_join(dplyr::filter(mine_clusters, dataset_name == "polygons") %>% dplyr::select(-dataset_name), by = c("id" = "dataset_id")) %>% sf::st_write(glue::glue(derived_tables_path, "/mine_polygons.geojson"), delete_dsn = TRUE) mine_properties %>% dplyr::select(id, commodities_list, development_stage, operating_status, country_name, country_isoa3) %>% dplyr::left_join(dplyr::filter(mine_clusters, dataset_name == "points") %>% dplyr::select(-dataset_name), by = c("id" = "dataset_id")) %>% sf::st_write(glue::glue(derived_tables_path, "/mine_properties.geojson"), delete_dsn = TRUE)
glue::glue(raw_data_path, "/mine_properties") %>% dir(pattern = "_production.Rdata", full.names = TRUE) %>% lapply(function(f){ load(f) tibble::as_tibble(x) %>% dplyr::rename(id = snl_id, commodity_name = commodity, quantity = value, quantity_unit = unit) %>% dplyr::filter(!is.na(quantity)) %>% dplyr::mutate( quantity_unit = ifelse(quantity_unit == "ct", "carat", quantity_unit), quantity_unit = ifelse(quantity_unit == "tonne", "metric_ton", quantity_unit), quantity_unit = ifelse(quantity_unit == "lb", "lbs", quantity_unit), quantity = purrr::map2_dbl(.x = quantity, .y = quantity_unit, .f = measurements::conv_unit, to = "metric_ton") %>% unlist(), quantity_unit = "metric_ton") %>% dplyr::mutate(id = str_pad(as.hexmode(id), pad = "0", 20)) }) %>% dplyr::bind_rows() %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_production.csv")) load("data/raw_data/mine_properties/Ore.Rdata") Ore %>% dplyr::select(id = snl_id, year, quantity = value, quantity_unit = unit) %>% dplyr::filter(!is.na(quantity)) %>% dplyr::mutate(id = str_pad(as.hexmode(id), pad = "0", 20)) %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_ore_processed.csv"))
tibble::tribble(~biome_group, ~biome_name, "Boreal Forests/Taiga", "Boreal Forests/Taiga", "Other", "Deserts & Xeric Shrublands", "Other", "Flooded Grasslands & Savannas", "Other", "Mangroves", "Mediterranean", "Mediterranean Forests, Woodlands & Scrub", "Other", "Montane Grasslands & Shrublands", "Temperate", "Temperate Broadleaf & Mixed Forests", "Temperate", "Temperate Conifer Forests", "Temperate", "Temperate Grasslands, Savannas & Shrublands", "Tropical & Subtropical", "Tropical & Subtropical Coniferous Forests", "Tropical & Subtropical", "Tropical & Subtropical Dry Broadleaf Forests", "Tropical & Subtropical", "Tropical & Subtropical Grasslands, Savannas & Shrublands", "Tropical & Subtropical", "Tropical & Subtropical Moist Broadleaf Forests", "Other", "Tundra") %>% readr::write_csv(glue::glue(derived_tables_path, "/biome_groups.csv"))
Commodities groups according to 10.1038/s41467-020-17928-5.
development_groups <- tibble::tribble( ~development_group, ~development_stage, "Operational", "Preproduction", "Operational", "Construction Planned", "Operational", "Construction Started", "Operational", "Commissioning", "Operational", "Operating", "Operational", "Satellite", "Operational", "Expansion", "Operational", "Limited Production", "Operational", "Residual Production", "Pre-operational", "Grassroots", "Pre-operational", "Exploration", "Pre-operational", "Target Outline", "Pre-operational", "Reserves Development", "Pre-operational", "Advanced Exploration", "Pre-operational", "Prefeas/Scoping", "Pre-operational", "Feasibility", "Pre-operational", "Feasibility Started", "Pre-operational", "Feasibility Complete", "Closed", "Closed" ) readr::write_csv(development_groups, glue::glue(derived_tables_path, "/development_groups.csv"))
# Spatial layers mine_properties <- sf::st_read(dsn = glue::glue(derived_tables_path, "/mine_properties.geojson")) mine_polygons <- sf::st_read(dsn = glue::glue(derived_tables_path, "/mine_polygons.geojson")) # Mining attribute tables mine_production <- readr::read_csv(glue::glue(derived_tables_path, "/mine_production.csv")) mine_ore_processed <- readr::read_csv(glue::glue(derived_tables_path, "/mine_ore_processed.csv")) # Concordance and groups development_groups <- readr::read_csv(glue::glue(derived_tables_path, "/development_groups.csv")) biome_groups <- readr::read_csv(glue::glue(derived_tables_path, "/biome_groups.csv")) fun_collapse_groups <- function(x){ x <- unlist(x) glue::glue_collapse(glue::glue("{unique(x)}"), sep = ",", width = Inf) } fun_adjust_operation_status <- function(x){ x <- fun_collapse_groups(x) if(is.na(x)) return("Unknown") if(stringr::str_detect(x, "Operational")) return("Operational") if(stringr::str_detect(x, "Pre-operational")) return("Pre-operational") if(stringr::str_detect(x, "Closed")) return("Closed") return("Unknown") } mine_properties %>% tidyr::separate_rows(commodities_list, sep = ",") %>% dplyr::left_join(development_groups, by = c("development_stage" = "development_stage")) %>% dplyr::group_by(hcluster_id) %>% dplyr::summarise( country_isoa3 = unique(country_isoa3), country_name = unique(country_name), commodities_list = fun_collapse_groups(commodities_list), development_stage = fun_collapse_groups(development_stage), operating_status = fun_collapse_groups(operating_status), development_group = fun_adjust_operation_status(development_group), .groups = 'drop') %>% sf::st_write(glue::glue(derived_tables_path, "/mine_properties_hcluster.geojson"), delete_dsn = TRUE) mine_properties %>% tidyr::separate_rows(commodities_list, sep = ",") %>% dplyr::left_join(development_groups, by = c("development_stage" = "development_stage")) %>% dplyr::group_by(dbcluster_id) %>% dplyr::summarise( country_isoa3 = unique(country_isoa3), country_name = unique(country_name), commodities_list = fun_collapse_groups(commodities_list), development_stage = fun_collapse_groups(development_stage), operating_status = fun_collapse_groups(operating_status), development_group = fun_adjust_operation_status(development_group), .groups = 'drop') %>% dplyr::mutate(development_group = purrr::map(.x = development_group, .f = fun_adjust_operation_status)) %>% sf::st_write(glue::glue(derived_tables_path, "/mine_properties_dbcluster.geojson"), delete_dsn = TRUE) dplyr::group_by(mine_polygons, hcluster_id) %>% dplyr::summarise(country_isoa3 = unique(country_isoa3), country_name = unique(country_name), biome_name = biome_name[which.max(area)], biome_code = biome_code[which.max(area)], eco_name = eco_name[which.max(area)], eco_code = eco_code[which.max(area)], area = sum(area, na.rm = TRUE), .groups = 'drop') %>% dplyr::left_join(biome_groups, by = c("biome_name" = "biome_name")) %>% sf::st_write(glue::glue(derived_tables_path, "/mine_polygons_hcluster.geojson"), delete_dsn = TRUE) dplyr::group_by(mine_polygons, dbcluster_id) %>% dplyr::summarise(country_isoa3 = unique(country_isoa3), country_name = unique(country_name), biome_name = biome_name[which.max(area)], biome_code = biome_code[which.max(area)], eco_name = eco_name[which.max(area)], eco_code = eco_code[which.max(area)], area = sum(area, na.rm = TRUE), .groups = 'drop') %>% dplyr::left_join(biome_groups, by = c("biome_name" = "biome_name")) %>% sf::st_write(glue::glue(derived_tables_path, "/mine_polygons_dbcluster.geojson"), delete_dsn = TRUE) mine_properties %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::select(id, hcluster_id) %>% dplyr::right_join(mine_production, by = c("id" = "id")) %>% dplyr::group_by(hcluster_id, commodity_name, year) %>% dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_production_hcluster.csv")) mine_properties %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::select(id, dbcluster_id) %>% dplyr::right_join(mine_production, by = c("id" = "id")) %>% dplyr::group_by(dbcluster_id, commodity_name, year) %>% dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_production_dbcluster.csv")) mine_properties %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::select(id, hcluster_id) %>% dplyr::right_join(mine_ore_processed, by = c("id" = "id")) %>% dplyr::group_by(hcluster_id, year) %>% dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_ore_processed_hcluster.csv")) mine_properties %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::select(id, dbcluster_id) %>% dplyr::right_join(mine_ore_processed, by = c("id" = "id")) %>% dplyr::group_by(dbcluster_id, year) %>% dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_ore_processed_dbcluster.csv"))
mine_polygons_hcluster <- sf::st_read(glue::glue(derived_tables_path, "/mine_polygons_hcluster.geojson")) %>% sf::st_drop_geometry() %>% tibble::as_tibble() mine_polygons_dbcluster <- sf::st_read(glue::glue(derived_tables_path, "/mine_polygons_dbcluster.geojson")) %>% sf::st_drop_geometry() %>% tibble::as_tibble() mine_production_dbcluster <- readr::read_csv(glue::glue(derived_tables_path, "/mine_production_dbcluster.csv")) mine_production_hcluster <- readr::read_csv(glue::glue(derived_tables_path, "/mine_production_hcluster.csv")) mine_production_hcluster %>% dplyr::filter(2000 <= year, year <= 2019) %>% dplyr::group_by(hcluster_id, commodity_name) %>% # Mass of each commodity produced per cluster dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% dplyr::right_join(mine_polygons_hcluster, by = c("hcluster_id" = "hcluster_id")) %>% dplyr::filter(!is.na(commodity_name), !commodity_name %in% c("NA", "", "Na", "na")) %>% dplyr::group_by(hcluster_id) %>% # Allocate area proportionally to the mass of each commodity produced per cluster dplyr::mutate(area = quantity/sum(quantity, na.rm = TRUE) * sum(area, na.rm = TRUE), area_intensity = area / quantity, companion = factor(length(unique(commodity_name))>1, c(T, F), c("Companion", "Single host"))) %>% dplyr::ungroup() %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_area_intensity_hcluster.csv")) mine_production_dbcluster %>% dplyr::filter(2000 <= year, year <= 2019) %>% dplyr::group_by(dbcluster_id, commodity_name) %>% # Mass of each commodity produced per cluster dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% dplyr::right_join(mine_polygons_dbcluster, by = c("dbcluster_id" = "dbcluster_id")) %>% dplyr::filter(!is.na(commodity_name), !commodity_name %in% c("NA", "", "Na", "na")) %>% dplyr::group_by(dbcluster_id) %>% # Allocate area proportionally to the mass of each commodity produced per cluster dplyr::mutate(area = quantity/sum(quantity, na.rm = TRUE) * sum(area, na.rm = TRUE), area_intensity = area / quantity, companion = factor(length(unique(commodity_name))>1, c(T, F), c("Companion", "Single host"))) %>% dplyr::ungroup() %>% readr::write_csv(glue::glue(derived_tables_path, "/mine_area_intensity_dbcluster.csv"))
fitness_hcluster <- function(x){ hcluster <- 1 fun_collapse_groups <- function(x){ x <- unlist(x) glue::glue_collapse(glue::glue("{unique(x)}"), sep = ",", width = Inf) } f = "/home/maus/workspace/mininglucc/analysis/data/derived_data/20210317/dist_matrix/BRA.rds" dist_matrix <- readRDS(dist_files[[f]]) hcluster <- fastcluster::hclust(dist_matrix, method = "single") %>% cutree(h = x[1]) cluster_ids <- mine_split[[f]] %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::transmute(hcluster_id = hcluster, dataset_id = dataset_id, country_isoa3 = country_isoa3, dataset_name = dataset_name) mine_properties_hcluster <- mine_commodities %>% dplyr::filter(country_isoa3 == f) %>% dplyr::left_join(dplyr::filter(cluster_ids, dataset_name == "points"), by = c("id" = "dataset_id")) %>% tidyr::separate_rows(commodities_list, sep = ",") %>% dplyr::group_by(hcluster_id) %>% dplyr::summarise(commodities_list = fun_collapse_groups(commodities_list), .groups = 'drop') %>% dplyr::transmute(cluster_id = hcluster_id, commodities_list = commodities_list) mine_polygons_hcluster <- mine_polygons %>% dplyr::filter(country_isoa3 == f) %>% dplyr::left_join(dplyr::filter(cluster_ids, dataset_name == "polygons"), by = c("id" = "dataset_id")) %>% dplyr::group_by(hcluster_id) %>% dplyr::summarise(area = sum(area, na.rm = TRUE), .groups = 'drop') res_hcluster_aux <- mine_properties_hcluster %>% tidyr::separate_rows(commodities_list, sep = ",") %>% dplyr::right_join(mine_polygons_hcluster, by = c("cluster_id" = "hcluster_id")) %>% dplyr::group_by(cluster_id) %>% dplyr::summarise(n = length(na.omit(commodities_list)), area = sum(area, na.rm = TRUE)) %>% dplyr::mutate(commodities_list = ifelse(n == 0, "Unknown", ifelse(n == 1, "Single host", "Companion"))) res_hcluster <- res_hcluster_aux %>% dplyr::group_by(commodities_list) %>% dplyr::summarise(n = dplyr::n(), area = sum(area)) %>% dplyr::mutate(n.perc = n/sum(n)*100, area.perc = area/sum(area)*100) companion <- dplyr::filter(res_hcluster_aux, commodities_list == "Companion") %>% dplyr::mutate(n * area) %>% dplyr::summarise(area = sum(area, na.rm = TRUE)) %>% .$area unkown <- dplyr::filter(res_hcluster, commodities_list == "Unknown") %>% .$area single_host <- dplyr::filter(res_hcluster, commodities_list == "Single host") %>% .$area y <- numeric(2) y[1] <- companion y[2] <- unkown #y[3] <- -single_host return(y) } get.elbow.points.indices <- function(x, y, threshold) { ids <- order(x) x <- x[ids] y <- y[ids] d1 <- diff(y) / diff(x) # first derivative d2 <- diff(d1) / diff(x[-1]) # second derivative # plot(d2) # indices <- which.max(abs(d2)) + 2 # indices <- which(abs(d1)>threshold) + 1 indices <- which.max(abs(d1)) + 1 #d1 <- diff(y[indices]) / diff(x[indices]) # first derivative #d2 <- diff(d1) / diff(x[indices][-1]) # second derivative # indices <- which(abs(d2) > threshold) return(ids[indices]) } mine_commodities <- mine_properties %>% dplyr::select(id, country_isoa3, commodities_list) %>% sf::st_drop_geometry() %>% tibble::as_tibble() mine_polygons <- mining_polygons_ecoregions %>% sf::st_drop_geometry() %>% tibble::as_tibble() %>% dplyr::select(id, country_isoa3, area) dist_files <- dir(glue::glue(derived_data_path, "/dist_matrix"), full.names = TRUE) names(dist_files) <- stringr::str_remove_all(basename(dist_files), ".rds") # f <- "USA" # usar2 <- mco::nsga2(fn = fitness_hcluster, idim = 1, odim = 2, generations = 2, popsize = 100, # lower.bounds = c(1000), upper.bounds = c(15000)) # # f <- "BRA" # bra2 <- mco::nsga2(fn = fitness_hcluster, idim = 1, odim = 2, generations = 2, popsize = 100, # lower.bounds = c(1000), upper.bounds = c(15000)) # f <- "ZAF" # zaf2 <- mco::nsga2(fn = fitness_hcluster, idim = 1, odim = 2, generations = 2, popsize = 100, # lower.bounds = c(1000), upper.bounds = c(15000)) library(parallelMap) library(ecr) parallelStartSocket(4) # start in socket mode and create 2 processes on localhost parallelExport("f", "mine_split", "dist_files", "mine_commodities", "mine_polygons", "fun_collapse_groups", "fun_collapse_groups") parallelLibrary("magrittr") lower = c(1000) upper = c(15000) system.time( zaf2 <- ecr::ecr(fitness.fun = fitness_hcluster, n.dim = 1, n.objectives = 2, representation = "float", lower = lower, upper = upper, mu = 8, lambda = 8, mutator = setup(mutGauss, lower = lower, upper = upper)) ) parallelStop() # turn parallelization off again plot(zaf2$pareto.front) ## 50 x 2 # user system elapsed # 5.172 2.137 228.185 ## 8 x 8 # user system elapsed # 3.258 2.098 220.277 plot(zaf2$pareto.front) f <- "ZAF" system.time( zaf1 <- mco::nsga2(fn = fitness_hcluster, idim = 1, odim = 2, generations = 8, popsize = 8, lower.bounds = c(1000), upper.bounds = c(15000)) ) # user system elapsed # 166.422 10.235 180.288 library(caRamel) f <- "BRA" results <- caRamel::caRamel(nobj = 2, nvar = 1, minmax = c(FALSE, FALSE) , bounds = matrix(data = c(1000, 15000), nrow = 1, ncol = 2), func = fitness_hcluster, popsize = 1000, # size of the genetic population archsize = 1, # size of the archive for the Pareto front maxrun = 100, # maximum number of calls prec = matrix(100, nrow = 1, ncol = 2), # 100km2 carallel=FALSE, sensitivity=TRUE) # sensitivity required results r2 <- zaf1 aux <- r2$value %>% as.data.frame() %>% tibble::as_tibble() %>% dplyr::mutate(id = dplyr::row_number(), par = r2$par[,1]) %>% dplyr::group_by(V2) %>% dplyr::summarise(id = id[which.min(V1)], par = par[which.min(V1)], V1 = min(V1)) %>% dplyr::group_by(V1) %>% dplyr::summarise(id = id[which.min(V1)], par = par[which.min(V2)], V2 = min(V2)) r2$value <- r2$value[aux$id,] r2$par <- r2$par[aux$id,] r2$pareto.optimal <- r2$pareto.optimal[aux$id] #points(findCutoff(aux$V1, aux$V2, method = "curvature"), col = "green") ids <- get.elbow.points.indices(x=aux$V1, y=aux$V2, threshold = 0.1) plot(r2) points(aux$V1[ids], aux$V2[ids], col = "blue") points(aux$V1[18], aux$V2[18], col = "green") r2$par[ids] r2 <- bra2 plot(r2) lines(x,predict(loess(y ~ x)),type="l") ids <- order(x) KneeArrower:::findCutoffCurvature(x,predict(loess(y ~ x))) points(findCutoff(r2$value[ids,1], r2$value[ids,2], method = "curvature"), col = "green") ids <- get.elbow.points.indices(x=r2$value[,1], y=r2$value[,2], threshold = 1e4) points(r2$value[ids,1], r2$value[ids,2], col = "blue") paretoSet(r2) dominatedHypervolume(r2) f <- "USA" # debugonce(nsga2R) hcluster_optim <- nsga2R::nsga2R(fn = fitness_hcluster, varNo = 2, objDim = 2, generations = 10, popSize = 100, lowerBounds = c(1000, 1), upperBounds = c(15000, 1)) par_optim <- hcluster_optim$parameters[hcluster_optim$paretoFrontRank == 1, ] %>% as.data.frame() %>% tibble::as_tibble() %>% dplyr::transmute(h = V1) hcluster_optim$objectives[hcluster_optim$paretoFrontRank == 1, ] %>% as.data.frame() %>% dplyr::transmute(Single_host = -V2, Unkown = V1) %>% dplyr::bind_cols(par_optim) %>% dplyr::group_by(Single_host, Unkown) %>% dplyr::summarise(h = min(h), .groups = 'drop') %>% dplyr::arrange(desc(Single_host), Unkown) %>% dplyr::mutate(ds = c(NA, diff(Single_host)), du = c(NA, diff(Unkown)), r = du/ds) %>% round(2) %>% View() plot(hcluster_optim$objectives[,1], hcluster_optim$objectives[,2]) library(rPref) sky <- hcluster_optim$objectives %>% as.data.frame() %>% tibble::as_tibble() %>% dplyr::transmute(Unkown = V1, Single_host = V2, h = hcluster_optim$parameters[, 1], .level = hcluster_optim$paretoFrontRank) sky <- rPref::psel(df, low(Unkown) * low(Single_host)) ggplot(sky, aes(x = Unkown, y = Single_host)) + geom_point(shape = 21) + geom_point(data = sky, size = 3) + geom_step(data = sky, direction = "vh") p <- low(Unkown) * low(Single_host) res <- psel(df, p, top = nrow(df)) ggplot(res, aes(x = Unkown, y = Single_host, color = factor(.level))) + geom_point(size = 3) + geom_step(direction = "vh") + scale_color_viridis_d()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.