library(tidyverse)
library(parallel)
library(stringr)
library(here)
library(sf)
library(viridis)
library(rnaturalearth)
library(spgwr)
library(spdep)
knitr::opts_chunk$set(echo = TRUE)

Load tables

version <- "20210317"
cluster_type <- "hcluster"
derived_data_path <- here::here('analysis', 'data', 'derived_data', version)
derived_tables_path <- glue::glue(derived_data_path, '/tables')

# Mine clusters 
properties_clusters <- glue::glue(derived_tables_path, "/mine_properties_", cluster_type, ".geojson") %>% 
  sf::st_read(stringsAsFactors = FALSE) 
polygons_clusters <- glue::glue(derived_tables_path, "/mine_polygons_", cluster_type, ".geojson") %>% 
  sf::st_read(stringsAsFactors = FALSE) 

# Mine extraction 
production_clusters <- glue::glue(derived_tables_path, "/mine_production_", cluster_type, ".csv") %>% 
  readr::read_csv()
ore_processed_clusters <- glue::glue(derived_tables_path, "/mine_ore_processed_", cluster_type, ".csv") %>%
  readr::read_csv()

# Area intensity 
mine_area_intensity <- glue::glue(derived_tables_path, "/mine_area_intensity_",cluster_type,".csv") %>% 
  readr::read_csv()

Mining properties X Development stage

properties_clusters %>%
  sf::st_drop_geometry() %>% 
  dplyr::group_by(development_group) %>% 
  dplyr::summarise(n = n()) %>% 
  dplyr::mutate(perc = n/sum(n)*100) 

Note: there is a large number of pre-operational properties, 73% of the properties.

Polygons cluster X Development stage

polygons_clusters %>% 
  sf::st_drop_geometry() %>%
  tibble::as_tibble() %>% 
  dplyr::left_join(sf::st_drop_geometry(properties_clusters), by = c("hcluster_id" = "hcluster_id")) %>% 
  dplyr::mutate(development_group = ifelse(is.na(development_group), "Unknown", development_group)) %>% 
  dplyr::group_by(development_group) %>% 
  dplyr::summarise(n = n(), area = sum(area)) %>% 
  dplyr::mutate(n.perc = n/sum(n)*100, area.perc = area/sum(area)*100)

Polygons cluster X missing commodity

polygons_clusters %>% 
  sf::st_drop_geometry() %>%
  tibble::as_tibble() %>% 
  dplyr::left_join(sf::st_drop_geometry(properties_clusters), by = c("hcluster_id" = "hcluster_id")) %>% 
  dplyr::mutate(commodities_list = ifelse(is.na(commodities_list), "Unknown", 
                                   ifelse(stringr::str_detect(commodities_list, ","), "Companion", "Single host"))) %>% 
  dplyr::group_by(commodities_list) %>% 
  dplyr::summarise(n = n(), area = sum(area)) %>% 
  dplyr::mutate(n.perc = n/sum(n)*100, area.perc = area/sum(area)*100)

Notes: - 240 (8.44%) from the polygons clusters identified in the satellite images are tagged pre-operational near to mining properties. This may indicate out-of-date information in the mining properties database. - 309 (10.8%) of the polygons clusters were not linked to any mining property. In terms of area only 2,654 mk^2 (4.63%) were not linked to any mining property. - 115 (4.05%) from polygons clusters are closed. In terms of area only closed mines accounts only for 1.09%. This could indicate a bias in the mapping as closed area may have vegetation regeneration, which make identification more difficult. - 89% of the polygons area is operational. Meaning that this areas may still expand in the near future and could be monitored from satellite.

Polygons cluster X commodities

polygons_clusters %>% 
  sf::st_drop_geometry() %>%
  tibble::as_tibble() %>% 
  dplyr::left_join(sf::st_drop_geometry(properties_clusters), by = c("hcluster_id" = "hcluster_id")) %>% 
  dplyr::mutate(commodities_list = ifelse(is.na(commodities_list), "Unknown", commodities_list)) %>% 
  dplyr::group_by(commodities_list) %>% 
  dplyr::summarise(n = n(), area = sum(area)) %>% 
  dplyr::mutate(n.perc = n/sum(n)*100, area.perc = area/sum(area)*100) %>% 
  dplyr::arrange(desc(area.perc)) %>% 
  dplyr::filter(cumsum(area.perc)<50)

Commodity X Area (Muiti-counting)

polygons_clusters %>% 
  sf::st_drop_geometry() %>%
  tibble::as_tibble() %>% 
  dplyr::left_join(sf::st_drop_geometry(properties_clusters), by = c("hcluster_id" = "hcluster_id")) %>% 
  dplyr::filter(!is.na(commodities_list)) %>% 
  dplyr::mutate(companionality = factor(stringr::str_detect(commodities_list, ","), c(T, F), c("Companion", "Single host"))) %>% 
  tidyr::separate_rows(commodities_list, sep = ",") %>% 
  dplyr::filter(commodities_list != "NA") %>% 
  group_by(commodities_list, companionality) %>% 
  dplyr::summarise(area = sum(area), .groups = 'drop') %>% 
  dplyr::group_by(commodities_list) %>% 
  dplyr::mutate(companionality_sum = sum(area)) %>%
  dplyr::ungroup() %>% 
  dplyr::arrange(dplyr::desc(companionality_sum), companionality) %>% 
  dplyr::filter(0.99 >= cumsum(area)/sum(area)) %>%
  dplyr::arrange(dplyr::desc(area)) %>% 
  ggplot(aes(x = reorder(commodities_list, companionality_sum), y = area, fill = companionality)) +
    geom_bar(stat = 'identity', position = 'stack') +
    coord_flip() +
    viridis::scale_fill_viridis(discrete = T, option = "E", begin = 0.8, end = 0.3) +
    scale_y_continuous(labels = scales::unit_format(unit = "k", scale = 1e-3, accuracy = 1)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(title = 'Commodities accounting for 99 % of the mining area',
       x = 'Commodity',
       y = 'Area [sqkm]') +
   guides(fill = guide_legend(title="Spatial cluster type"))

Note: Gold is extracted in most of the mapped areas

Commodity (production-allocated) X Area

production_clusters %>% 
  dplyr::filter(2000 <= year, year <= 2019) %>% 
  dplyr::group_by(hcluster_id, commodity_name) %>% 
  dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), quantity_unit = unique(quantity_unit), .groups = 'drop') %>% 
  dplyr::right_join(sf::st_drop_geometry(polygons_clusters), by = c("hcluster_id" = "hcluster_id")) %>% 
  dplyr::group_by(hcluster_id) %>% 
  dplyr::filter(commodity_name != "NA") %>% 
  dplyr::mutate(area = quantity/sum(quantity, na.rm = TRUE) * sum(area, na.rm = TRUE)) %>%
  dplyr::ungroup() %>% 
  dplyr::group_by(commodity_name) %>% 
  dplyr::summarise(area = sum(area, na.rm = TRUE), .groups = 'drop') %>% 
  dplyr::arrange(dplyr::desc(area)) %>% 
  dplyr::arrange(dplyr::desc(area)) %>% 
  ggplot(aes(x = reorder(commodity_name, area), y = area)) +
    geom_bar(stat = 'identity', position = 'stack') +
    coord_flip() +
    viridis::scale_fill_viridis(discrete = T, option = "E", begin = 0.8, end = 0.3) +
    scale_y_continuous(labels = scales::unit_format(unit = "k", scale = 1e-3, accuracy = 1)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(title = 'Mining area X Commodities',
       x = 'Commodity',
       y = 'Area [sqkm]') +
   guides(fill = guide_legend(title="Spatial cluster type"))

Note Allocated per production Coal is extracted in most of the mapped area, followed by Copper and Iron ore. Gold is only the fourth.

Commodity (production-allocated-split-companion) X Area

mine_area_intensity %>% 
  dplyr::group_by(commodity_name, companion) %>% 
  dplyr::summarise(area = sum(area, na.rm = TRUE), .groups = 'drop') %>% 
  dplyr::arrange(dplyr::desc(area)) %>% 
  ggplot(aes(x = reorder(commodity_name, area), y = area, fill = companion)) +
    geom_bar(stat = 'identity', position = 'stack') +
    coord_flip() +
    viridis::scale_fill_viridis(discrete = T, option = "E", begin = 0.8, end = 0.3) +
    scale_y_continuous(labels = scales::unit_format(unit = "k", scale = 1e-3, accuracy = 1)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(title = 'Mining area allocated to the commodities using production mass',
       x = 'Commodity',
       y = 'Area [sqkm]') +
   guides(fill = guide_legend(title="Spatial cluster type"))

Note This graph shows the shares of the area where a commodity is extracted as single host and the share of area where it is extracted with companions.

Land intensity table

area_intensity_clusters %>% 
  ggplot(aes(x = quantity, y = area)) + 
  geom_point(size = 0.3) + 
  geom_smooth(method="lm") +
  facet_wrap(~commodity_name, scales = "free")

Scarter plot production x area

area_intensity_clusters %>% 
  ggplot(aes(x = quantity, y = area)) + 
  geom_point(size = 0.3) + 
  geom_smooth(method="lm") +
  facet_wrap(~commodity_name, scales = "free")

Note: Several commodities show a nearly horizontal "flat" regression line, i.e. the same size of area can have very different reported production. This indicate that the same mineral could have different impacts in differences regions.

Mining land use per biome

mine_polygons %>% 
  sf::st_drop_geometry() %>% 
  tibble::as_tibble() %>% 
  dplyr::group_by(biome_name) %>% 
  dplyr::summarise(area = sum(area, na.rm = TRUE), .groups = 'drop') %>% 
  dplyr::arrange(dplyr::desc(area)) %>% 
  ggplot(aes(x = reorder(biome_name, area), y = area)) +
    geom_bar(stat = 'identity', position = 'stack') +
    coord_flip() +
    viridis::scale_fill_viridis(discrete = T, option = "E", begin = 0.8, end = 0.3) +
    scale_y_continuous(labels = scales::unit_format(unit = "k", scale = 1e-3, accuracy = 1)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(title = 'Mining area per Biome',
       x = 'Commodity',
       y = 'Area [sqkm]') +
   guides(fill = guide_legend(title="Spatial cluster type"))

TODO

mine_area_intensity %>% 
  dplyr::group_by(biome_name, commodity_name) %>% 
  dplyr::summarise(area = sum(area, na.rm = TRUE),
                   quantity = sum(area, na.rm = TRUE), .groups = 'drop') %>% 
  dplyr::mutate(area_intensity = area / quantity) %>% 
  dplyr::filter(area > 1000) %>% 
  dplyr::arrange(dplyr::desc(area_intensity)) %>% 
  ggplot(aes(x = reorder(biome_name, area_intensity), y = area_intensity)) +
    geom_bar(stat = 'identity', position = 'stack') +
    coord_flip() +
    facet_wrap(~commodity_name, scales = "free") + 
    viridis::scale_fill_viridis(discrete = T, option = "E", begin = 0.8, end = 0.3) +
    scale_y_continuous(labels = scales::unit_format(unit = "k", scale = 1e-3, accuracy = 1)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    labs(title = 'Mining area intensity per Biome',
       x = 'Commodity',
       y = 'Area [sqkm]') +
   guides(fill = guide_legend(title="Spatial cluster type"))
lmodel <- lm(area ~ quantity, data = mine_area_intensity)
summary(lmodel)

mine_ols <- glm(area ~ quantity, data = mine_area_intensity) 
summary(mine_ols)

mine_sp <- polygons_clusters %>% 
  dplyr::select(hcluster_id) %>% 
  dplyr::mutate(geometry = sf::st_centroid(geometry)) %>% 
  dplyr::right_join(mine_area_intensity, by = c("hcluster_id" = "hcluster_id")) %>%
  dplyr::group_by(hcluster_id) %>%
  dplyr::filter(commodity_name =="Coal") %>% 
  dplyr::summarise(quantity = sum(quantity, na.rm = TRUE), area = sum(area, na.rm = TRUE)) %>% 
  dplyr::filter(!is.na(area), !is.na(quantity)) %>% 
  dplyr::mutate(geometry = sf::st_centroid(geometry))  

mine_sp %>%
  tidyr::pivot_longer(cols = c(quantity, area)) %>% 
  ggplot() +
  geom_histogram(aes(x=value)) +
  facet_wrap(~name, ncol = 2)

mine_sp %>%
  tidyr::pivot_longer(cols = c(quantity, area)) %>% 
  ggplot() +
  geom_histogram(aes(x=log10(value))) +
  facet_wrap(~name, ncol = 2)

cor(mine_sp$area, mine_sp$quantity)

mine_gwrb1 <- spgwr::gwr.sel(area ~ quantity, data = sf::as_Spatial(mine_sp))
mine_gwrb1
mine_gwr <- spgwr::gwr(area ~ quantity, data = sf::as_Spatial(mine_sp), 
                       bandwidth = mine_gwrb1, se.fit = TRUE, hatmatrix = TRUE)
mine_gwr
BFC02.gwr.test(mine_gwr)
BFC99.gwr.test(mine_gwr)
LMZ.F1GWR.test(mine_gwr)
LMZ.F2GWR.test(mine_gwr)

TODO:

Cluster

Optmize cluster (loop over different distance threshhold) record the area wthout commodity and the area of conpanion

Can this function be optimized as an objective function?????

Spatial analysis

GWR x OLS per commodity

How to identify differences between biomes???? Add a dummy????



fineprint-global/mininglucc documentation built on Jan. 18, 2022, 9:01 p.m.