knitr::opts_chunk$set( fig.retina = 3, fig.align = "center", fig.width = 6.93, fig.height = 6.13, out.width = "70%", echo = FALSE ) library("drake") library("magrittr") R.utils::sourceDirectory("R") R.utils::sourceDirectory("code") # load drake objects loadd( vi_data, nri_data, bands_data, coords_vi_nri_clean, filter_values, filter_info_gain_nbins, trees_with_bands, trees_with_bands_no_buffer ) library("DataExplorer") library("dplyr") library("ggsci") library("ggpubr") library("ggplot2") library("knitr") library("purrr") library("sp") library("raster") library("fs") library("patchwork") library("here")
intro <- introduce(vi_data) intro_df <- data.frame( "Name" = c( "Rows", "Columns", "Discrete columns", "Continuous columns", "All missing columns", "Missing observations", "Complete Rows", "Total observations" ), "Value" = c( format(intro[["rows"]], big.mark = ","), format(intro[["columns"]], big.mark = ","), format(intro[["discrete_columns"]], big.mark = ","), format(intro[["continuous_columns"]], big.mark = ","), format(intro[["all_missing_columns"]], big.mark = ","), format(intro[["total_missing_values"]], big.mark = ","), format(intro[["complete_rows"]], big.mark = ","), format(intro[["total_observations"]], big.mark = ",") ) ) knitr::kable(intro_df)
plot_histogram(vi_data)
# remove response vi_data_no_defol_no_response <- vi_data vi_data_no_defol_no_response$defoliation <- NULL pca_vi <- DataExplorer::plot_prcomp(vi_data_no_defol_no_response, variance_cap = 0.95, ggtheme = ggpubr::theme_pubclean(), geom_label_args = list(size = 2.5, nudge_y = 0.03), parallel = TRUE ) pca_vi <- pca_vi[[1]] + labs(title = "Target POV: 95%") pca_vi[[1]]
plot_correlation(vi_data)
intro <- introduce(nri_data) intro_df <- data.frame( "Name" = c( "Rows", "Columns", "Discrete columns", "Continuous columns", "All missing columns", "Missing observations", "Complete Rows", "Total observations" ), "Value" = c( format(intro[["rows"]], big.mark = ","), format(intro[["columns"]], big.mark = ","), format(intro[["discrete_columns"]], big.mark = ","), format(intro[["continuous_columns"]], big.mark = ","), format(intro[["all_missing_columns"]], big.mark = ","), format(intro[["total_missing_values"]], big.mark = ","), format(intro[["complete_rows"]], big.mark = ","), format(intro[["total_observations"]], big.mark = ",") ) ) kable(intro_df)
No histograms for NRI -> too many features.
# remove response nri_data_no_response <- nri_data nri_data_no_response$defoliation <- NULL pca_nri <- DataExplorer::plot_prcomp(nri_data_no_response, variance_cap = 0.95, ggtheme = ggpubr::theme_pubclean(), geom_label_args = list(size = 2.5, nudge_y = 0.03), parallel = TRUE ) pca_nri <- pca_nri[[1]] + labs(title = "Target POV: 95%") pca_nri
No correlation plot for NRI -> too many features.
intro <- introduce(bands_data) intro_df <- data.frame( "Name" = c( "Rows", "Columns", "Discrete columns", "Continuous columns", "All missing columns", "Missing observations", "Complete Rows", "Total observations" ), "Value" = c( format(intro[["rows"]], big.mark = ","), format(intro[["columns"]], big.mark = ","), format(intro[["discrete_columns"]], big.mark = ","), format(intro[["continuous_columns"]], big.mark = ","), format(intro[["all_missing_columns"]], big.mark = ","), format(intro[["total_missing_values"]], big.mark = ","), format(intro[["complete_rows"]], big.mark = ","), format(intro[["total_observations"]], big.mark = ",") ) ) kable(intro_df)
plot_histogram(bands_data)
# remove response bands_data_no_response <- bands_data bands_data_no_response$defoliation <- NULL pca_hr <- DataExplorer::plot_prcomp(bands_data_no_response, variance_cap = 0.98, geom_label_args = list(size = 2.5, nudge_y = 0.03), ggtheme = ggpubr::theme_pubclean() ) pca_hr <- pca_hr[[1]] + labs(title = "Target POV: 95%") pca_hr
plot_correlation(bands_data)
vi_data_plot <- vi_data %>% mutate(plot = factor(rep( c("Laukiz 1", "Laukiz 2", "Luiando", "Oiartzun"), c(559, 451, 301, 497) )))
mean_defol <- vi_data_plot %>% group_by(plot) %>% summarise(mean(defoliation)) %>% pull(.) vi_data_plot %>% group_by(plot) %>% summarise(mean(defoliation))
cov_defol <- vi_data_plot %>% group_by(plot) %>% summarise((sd(defoliation) / mean(defoliation)) * 100) %>% pull(.) vi_data_plot %>% group_by(plot) %>% summarise((sd(defoliation) / mean(defoliation)) * 100)
sd_skewness_defol <- vi_data_plot %>% group_by(plot) %>% summarise(((sd(defoliation) / mean(defoliation)) * 100) / e1071::skewness(defoliation)) %>% pull(.) vi_data_plot %>% group_by(plot) %>% summarise(((sd(defoliation) / mean(defoliation)) * 100) / e1071::skewness(defoliation))
sd_defol <- vi_data_plot %>% group_by(plot) %>% summarise(sd(defoliation)) %>% pull(.) vi_data_plot %>% group_by(plot) %>% summarise(sd(defoliation))
boxplot_defol <- vi_data_plot %>% group_by(plot) %>% ggboxplot( x = "plot", y = "defoliation", color = "plot", add = "jitter", add.params = list(size = "defoliation") ) + scale_size(range = c(0.5, 0.5)) + annotate("text", label = expression(bold(atop("n = 559", bar(x) ~ "= 57.23"))), x = 1, y = 112, size = 4, colour = "#BC3C29", fontface = 2 ) + annotate("text", label = expression(bold(atop("n = 451", bar(x) ~ "= 13.54"))), x = 2, y = 112, size = 4, colour = "#0072B5", fontface = 2 ) + annotate("text", label = expression(bold(atop("n = 301", bar(x) ~ "= 68.36"))), x = 3, y = 112, size = 4, colour = "#E18727", fontface = 2 ) + annotate("text", label = expression(bold(atop("n = 497", bar(x) ~ "= 69.07"))), x = 4, y = 112, size = 4, colour = "#20854E", fontface = 2 ) + scale_color_nejm() + ggpubr::theme_pubr(base_size = 14) + theme(legend.position = "none") + labs(y = "Total defoliation per tree (%)", x = "Plot") boxplot_defol
Mean average distance between all trees in a given plot.
Calculated via raster::pointDistance(allpairs = TRUE)
.
plots <- list("Laukiz 1", "Laukiz 2", "Luiando", "Oiartzun") dist <- map(plots, ~ { coords <- coords_vi_nri_clean %>% mutate(plot = factor(rep( c("Laukiz 1", "Laukiz 2", "Luiando", "Oiartzun"), c(559, 451, 301, 497) ))) %>% filter(plot == .x) %>% dplyr::select(-plot) points <- SpatialPoints( coords = coords, # EPSG: 25830 proj4string = CRS("+proj=utm +zone=30 +ellps=WGS84 +datum=WGS84 +units=m +no_defs") ) distance <- mean(as.dist(pointDistance(points, allpairs = TRUE, lonlat = FALSE ))) }) set_names(dist, plots) dist_plots <- unlist(dist)
# file_move("docs/figure/eda.Rmd/defoliation-distribution-plot-1.pdf", # "code/98-paper/journal/") # file_move("docs/figure/eda.Rmd/pca-bands-1.pdf", # "code/98-paper/journal/")
Comparing the "proportion of variance explained" (POV) for all feature sets.
pca_vi / pca_nri / pca_hr * theme( plot.title = element_text(size = 10), axis.title.y = element_text(size = 8), axis.title.x = element_text(size = 10) ) + patchwork::plot_annotation( tag_levels = list(c("VI", "NRI", "HR")), title = "Proportion of variance explained by principal components", subtitle = "Labels indicate cumulative % of explained variance" )
Standard scenario (somewhere within the pixel)
knitr::include_graphics(here("docs/figure/tree-buffer-vis.png"))
Extreme scenario (at the border of one or multiple pixels)
knitr::include_graphics(here("docs/figure/tree-buffer-vis-max.png"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.