knitr::opts_chunk$set( fig.retina = 3, fig.align = "center", fig.width = 8.5, fig.asp = 0.66, out.width = "70%", echo = FALSE ) options( scipen = 999 ) library("drake") library("hsdar") library("dplyr") library("ggplot2") library("ggpubr") library("ggpmisc") library("patchwork") library("iml") # load drake objects loadd( # Slurm resources suggestion: at least 20 cores and 2 GB / core = 40 GB pro job fi_permut_hr, fi_permut_vi, fi_ale_hr, fi_ale_vi, fi_ale_hr_gs20, fi_ale_vi_gs20, df_wavelengths_from_indices, spec_sigs )
Preview the ordered feature importance results for datasets "HR" and "VI".
fi_ranked_hr <- fi_permut_hr$res %>% tibble::rownames_to_column("measure") %>% tidyr::pivot_longer( cols = starts_with("B"), values_to = "importance", names_to = "feature" ) %>% dplyr::mutate(wavelength = seq(420, 995, 4.75)) %>% dplyr::mutate(numeric_id = seq(5, 126, 1)) %>% dplyr::arrange(desc(importance)) %>% dplyr::mutate(rank = row_number()) %>% dplyr::select(-measure) fi_ranked_hr fi_ranked_vi <- fi_permut_vi$res %>% tibble::rownames_to_column("measure") %>% dplyr::select(-contains("ID..")) %>% tidyr::pivot_longer( cols = !measure, values_to = "importance", names_to = "feature" ) %>% # mutate(wavelength = seq(420, 995, 4.75)) %>% dplyr::arrange(desc(importance)) %>% dplyr::mutate(rank = row_number()) %>% dplyr::select(-measure) %>% dplyr::mutate(feature = stringr::str_replace(feature, "bf2_", "")) fi_ranked_vi
PROSAIL is a algorithm simulating Spectral Signature (mean)s of vegetation, see ?hsdar::PROSAIL
.
Reflectance is scaled to 0-10 to be able to plot it in the same plot as the feature importance rankings -> the axis limits for the y and z axis needs to match.
PROSAIL returns a Spectral Signature (mean) from 400 nm to 2500 nm -> we take the values only and subset to 400 nm - 1000 nm. Because we order from 1 - 10 with 1 being the best rank, we have to reverse the scaling of the reflectance values.
spectra_sim <- hsdar::PROSAIL() spectra_df <- data.frame( reflectance = as.vector(spectra_sim@spectra@spectra_ma), wavelength = seq(400, 2500, 1) ) %>% dplyr::filter(wavelength < 1000) %>% # scale the reflectance to [0, 2] to play nicely with the y-axis later (mean dec in rmse) dplyr::mutate(reflectance = scale(reflectance, center = FALSE, scale = max(reflectance, na.rm = TRUE) / 2 ))
Next we bind the simulated data with the feature importance rankings. To join both data.frames we need to round the reflectance centers of the bands to integers to match with the reflectance values created by PROSAIL.
# round the wavelengths of the HR dataset to match with the simulated ones fi_ranked_hr$wavelength <- round(fi_ranked_hr$wavelength) data_hr_merged <- dplyr::left_join(spectra_df, fi_ranked_hr, by = c("wavelength")) %>% dplyr::left_join(spec_sigs, by = c("wavelength")) %>% dplyr::mutate(class = "HR") %>% dplyr::mutate(reflectance = as.numeric(reflectance))
To label only a subset of the data, a custom data.frame is created.
df_wavelengths_from_indices_imp <- df_wavelengths_from_indices %>% dplyr::left_join(fi_ranked_vi, by = c("class" = "feature")) %>% na.omit() df_wavelengths_from_indices_label <- df_wavelengths_from_indices_imp %>% arrange(rank) %>% dplyr::group_by(class, rank) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% dplyr::arrange(rank) %>% dplyr::slice(1:10) df_wavelengths_from_hr <- data_hr_merged %>% dplyr::arrange(rank) %>% dplyr::group_by(class, rank) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% dplyr::arrange(rank) %>% dplyr::slice(1:10) %>% dplyr::select(-class) %>% dplyr::rename(class = feature) df_label_all <- df_wavelengths_from_indices_label %>% dplyr::bind_rows(df_wavelengths_from_hr)
plot_reflectance_imp_hr <- function(data) { ggplot(data, aes(x = .data[["wavelength"]], y = .data[["importance"]])) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$laukiz1), ], aes( x = wavelength, y = laukiz1, color = "Spectral Signature (mean) of Laukiz1" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$laukiz2), ], aes( x = wavelength, y = laukiz2, color = "Spectral Signature (mean) of Laukiz2" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$luiando), ], aes( x = wavelength, y = luiando, color = "Spectral Signature (mean) of Luiando" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$oiartzun), ], aes( x = wavelength, y = oiartzun, color = "Spectral Signature (mean) of Oiartzun" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + # HR: plot only rank 11:last geom_point( data = data_hr_merged[which(!(data_hr_merged$rank %in% c(1:10))), ], aes(color = "HR: Rank 11:122"), size = 2.5, shape = 1, na.rm = TRUE ) + # HR: plot only rank 1:10 geom_point( data = data_hr_merged[which(data_hr_merged$rank %in% c(1:10)), ], aes(color = "HR: Rank 1:10"), shape = 20, size = 3, na.rm = TRUE ) + scale_color_manual(values = c( "HR: Rank 11:122" = "grey", "HR: Rank 1:10" = "black", "Spectral Signature (mean) of Laukiz1" = "#BC3C29", "Spectral Signature (mean) of Laukiz2" = "#0072B5", "Spectral Signature (mean) of Luiando" = "#E18727", "Spectral Signature (mean) of Oiartzun" = "#20854E" )) + scale_x_continuous(limits = c(400, 1000), breaks = scales::pretty_breaks()) + scale_y_continuous( sec.axis = sec_axis(~ scale(-., center = FALSE, scale = max(., na.rm = TRUE) ), labels = c(1.0, 0.75, 0.55, 0.25, 0), name = "Scaled Reflectance [0, 1]" ) ) + guides(color = guide_legend( title = NULL, override.aes = list( linetype = c("blank", "blank", "solid", "solid", "solid", "solid"), shape = c(20, 1, NA, NA, NA, NA), color = c("black", "grey", "#BC3C29", "#0072B5", "#E18727", "#20854E") ) )) + labs( title = "Permutation-based Variable Importance for dataset 'HR'", subtitle = paste0( "The ten most important features are labeled by their band number." ), caption = "Learner: SVM; 100 Monte-Carlo Iterations", y = "Mean Decrease in RMSE", x = "Wavelength [nm]" ) + ggrepel::geom_label_repel( # only label the best 10 features data = df_wavelengths_from_hr, label = df_wavelengths_from_hr$class, size = 4, na.rm = TRUE ) + ggpubr::theme_pubclean(base_size = 14) } plot_reflectance_imp_vi <- function(data) { ggplot(data, aes(x = .data[["wavelength"]], y = .data[["importance"]])) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$laukiz1), ], aes( x = wavelength, y = laukiz1, color = "Spectral Signature (mean) of Laukiz1" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$laukiz2), ], aes( x = wavelength, y = laukiz2, color = "Spectral Signature (mean) of Laukiz2" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$luiando), ], aes( x = wavelength, y = luiando, color = "Spectral Signature (mean) of Luiando" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + geom_line( data = data_hr_merged[!is.na(data_hr_merged$oiartzun), ], aes( x = wavelength, y = oiartzun, color = "Spectral Signature (mean) of Oiartzun" ), na.rm = TRUE, linetype = "solid", size = 0.6 ) + # VI: rank 1:10 geom_point( data = df_wavelengths_from_indices_imp[which((df_wavelengths_from_indices_imp$rank %in% c(1:10))), ], aes(x = wavelength, y = importance, group = class), size = 3, shape = 20 ) + # VI: rank 1:10 geom_line( data = df_wavelengths_from_indices_imp[which((df_wavelengths_from_indices_imp$rank %in% c(1:10))), ], aes(x = wavelength, y = importance, group = class, color = "VI: Rank 1:10"), size = 0.2, linetype = "dashed" ) + # VI: rank 11:89 geom_point( data = df_wavelengths_from_indices_imp[which(!(df_wavelengths_from_indices_imp$rank %in% c(1:10))), ], aes(x = wavelength, y = importance, group = class), color = "grey", shape = 1, size = 2.5 ) + # VI: rank 11:89 geom_line( data = df_wavelengths_from_indices_imp[which(!(df_wavelengths_from_indices_imp$rank %in% c(1:10))), ], aes(x = wavelength, y = importance, group = class, color = "VI: Rank 11:89"), size = 0.2, linetype = "dashed" ) + scale_color_manual(values = c( "VI: Rank 11:89" = "grey", "VI: Rank 1:10" = "black", "Spectral Signature (mean) of Laukiz1" = "#BC3C29", "Spectral Signature (mean) of Laukiz2" = "#0072B5", "Spectral Signature (mean) of Luiando" = "#E18727", "Spectral Signature (mean) of Oiartzun" = "#20854E" )) + scale_x_continuous(limits = c(400, 1000), breaks = scales::pretty_breaks()) + scale_y_continuous( sec.axis = sec_axis(~ scale(-., center = FALSE, scale = max(., na.rm = TRUE) ), labels = c(1.0, 0.75, 0.55, 0.25, 0), name = "Scaled Reflectance [0, 1]" ) ) + guides(color = guide_legend( title = NULL, override.aes = list( linetype = c("solid", "solid", "solid", "solid", "dashed", "dashed"), shape = c(NA, NA, NA, NA, 20, 1), color = c("#BC3C29", "#0072B5", "#E18727", "#20854E", "black", "grey") ) )) + labs( title = "Permutation-based Variable Importance for dataset 'VI'", subtitle = paste0( "The ten most important features are labeled by their index name." ), caption = "Learner: SVM; 100 Monte-Carlo Iterations", y = "Mean Decrease in RMSE", x = "Wavelength [nm]" ) + ggrepel::geom_label_repel( # only label the best 10 features data = df_wavelengths_from_indices_label, label = df_wavelengths_from_indices_label$class, size = 4, na.rm = TRUE ) + ggpubr::theme_pubclean(base_size = 14) } plot_reflectance_imp_absolute <- function(data, x_identifier, class) { pl <- ggplot(data, aes(x = .data[[x_identifier]], y = .data[["importance"]])) + labs(y = "Importance", x = "Band number") + geom_segment(aes( x = .data[[x_identifier]], y = 0, xend = .data[[x_identifier]], yend = .data[["importance"]] ), color = "grey", show.legend = FALSE ) + geom_point(size = 1, color = "black", show.legend = T) + labs( title = glue::glue("Permutation-based Variable Importance for Dataset '{class}'"), subtitle = "Absolute importance values by band", caption = "Learner: SVM; 100 Monte-Carlo iterations" ) + ggpubr::theme_pubclean() if (is.character(data[[x_identifier]])) { pl } else { pl + scale_x_continuous(breaks = seq(5, 125, 5)) pl } }
p11 <- data_hr_merged %>% plot_reflectance_imp_hr() p11
p12 <- data_hr_merged %>% plot_reflectance_imp_vi() p12
p11 / p12
p2 <- fi_ranked_hr %>% plot_reflectance_imp_absolute("numeric_id", class = "HR") p2
p3 <- fi_ranked_vi %>% plot_reflectance_imp_absolute("feature", class = "VI") + ggpubr::rotate_x_text() p3
Vogelmann2 $(R_{734}-R_{747})/(R_{715}+R_{726})$ Vogelmann et al. (1993)
Vogelmann4 $(R_{734}-R_{747})/(R_{715}+R_{720})$ Vogelmann et al. (1993)
Vogelmann3 $D_{715}/D_{705}$ Vogelmann et al. (1993)
Vogelmann $R_{740}/R_{720}$ Vogelmann et al. (1993)
NPCI $(R_{680}-R_{430})/(R_{680}+R_{430})$
D2 $D_{705}/D_{722}$
Datt3 $D_{754}/D_{704}$
PWI $R_{900}/R_{970}$
SR7 $R_{440}/R_{690}$
SRPI $R_{430}/R_{680}$
Dxxx: First derivation of reflectance values at wavelength 'xxx'. Rxxx: Reflectance at wavelength 'xxx'.
Reference: ?hsdar::vegindex()
p2 / p3
ALE plots via package {iml}
Top ten HR features from permutation Vimp
ale_p1 = fi_ale_hr$plot( features = df_wavelengths_from_hr$class, ncol = 2 ) & scale_y_continuous( n.breaks = 2 ) & labs(y = "ALE") & theme_pubr(base_size = 14) & theme( legend.text = element_text(size = 13), legend.title = element_text(size = 13), axis.title.y = element_text(size = 8), axis.text.y = element_text(size = 9) ) # Custom y axis breaks for subplots ale_p1[[2]] = ale_p1[[2]] + scale_y_continuous( n.breaks = 3 ) ale_p1[[3]] = ale_p1[[3]] + scale_y_continuous( n.breaks = 4 ) ale_p1[[7]] = ale_p1[[7]] + scale_y_continuous( n.breaks = 4 ) ale_p1[[8]] = ale_p1[[8]] + scale_y_continuous( n.breaks = 4 ) ale_p1[[9]] = ale_p1[[9]] + scale_y_continuous( n.breaks = 4 ) ale_p1[[10]] = ale_p1[[10]] + scale_y_continuous( n.breaks = 4 ) ale_p1
Top ten HR features from permutation Vimp
ale_p2 = fi_ale_hr_gs20$plot( features = df_wavelengths_from_hr$class, ncol = 2 ) & scale_y_continuous( n.breaks = 2 ) & labs(y = "ALE") & theme_pubr(base_size = 14) & theme( legend.text = element_text(size = 13), legend.title = element_text(size = 13), axis.title.y = element_text(size = 8), axis.text.y = element_text(size = 9) ) # Custom y axis breaks for subplots ale_p2[[1]] = ale_p2[[1]] + scale_y_continuous( n.breaks = 4 ) ale_p2[[2]] = ale_p2[[2]] + scale_y_continuous( n.breaks = 5 ) ale_p2[[3]] = ale_p2[[3]] + scale_y_continuous( n.breaks = 4 ) ale_p2[[4]] = ale_p2[[4]] + scale_y_continuous( n.breaks = 4 ) ale_p2[[6]] = ale_p2[[6]] + scale_y_continuous( n.breaks = 4 ) ale_p2[[7]] = ale_p2[[7]] + scale_y_continuous( n.breaks = 4 ) ale_p2[[10]] = ale_p2[[10]] + scale_y_continuous( n.breaks = 3 ) ale_p2
Top ten VI features from permutation Vimp
ale_p3 = fi_ale_vi$plot( features = df_wavelengths_from_indices_label$class, ncol = 2 ) & scale_y_continuous( n.breaks = 2 ) & labs(y = "ALE") & theme_pubr(base_size = 14) & theme( legend.text = element_text(size = 13), legend.title = element_text(size = 13), axis.title.y = element_text(size = 8), axis.text.y = element_text(size = 9) ) # Custom y axis breaks for subplots ale_p3[[1]] = ale_p3[[1]] + scale_y_continuous( n.breaks = 4 ) ale_p3[[2]] = ale_p3[[2]] + scale_y_continuous( n.breaks = 4 ) ale_p3[[3]] = ale_p3[[3]] + scale_y_continuous( n.breaks = 4 ) ale_p3[[4]] = ale_p3[[4]] + scale_y_continuous( n.breaks = 4 ) ale_p3
Top ten HR features from permutation Vimp
ale_p4 = fi_ale_vi_gs20$plot( features = df_wavelengths_from_indices_label$class, ncol = 2 ) & scale_y_continuous( n.breaks = 2 ) & labs(y = "ALE") & theme_pubr(base_size = 14) & theme( legend.text = element_text(size = 13), legend.title = element_text(size = 13), axis.title.y = element_text(size = 8), axis.text.y = element_text(size = 9) ) # Custom y axis breaks for subplots ale_p4[[2]] = ale_p3[[2]] + scale_y_continuous( n.breaks = 4 ) ale_p4[[3]] = ale_p3[[3]] + scale_y_continuous( n.breaks = 4 ) ale_p4[[4]] = ale_p3[[4]] + scale_y_continuous( n.breaks = 4 ) ale_p4
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.