In MaximeRivest/agroPack:

Introduction

agroPack contains the data and the functions used to analyse the relationship between soil biodiversity and windbreaks in arable lands of southern Quebec

loading the packages

Throughout the code I use agroPack. Which can be installed with devtools::install_github("maximerivest/agroPack"). I also use dplyr and ggplot2, both available on CRAN.

library(agroPack)
library(ggplot2)
library(dplyr)

library(agroPack)
library(ggplot2)
library(dplyr)

Give path to my document

I use two computers with exactly the some content in "Documents" but one is hosted on a different hardrive. Depending where I run this script I comment in/out either of the paths.

#cur_dir <- "/media/bigdrive/document_FedoraFall2018/"
cur_dir <- "~/Documents/"

From sequencing data to richness

I inserted data into the package. I tend to load raw data into the package and do the transformation here. So the following lines of codes transforms the OTU data.table into species diversity metric. I start from a phyloseq data structure, use phyloseq for some filtering and transformation and then I extract the otu_table and match rownames. Finally, this data structure can be feed into a vegan function.

bako <- baktps
fungo <- fungips
#---- Rarefy to even depth and Compute shannon diversity -----------------------
set.seed(100)
fungo = phyloseq::filter_taxa(fungo, function(x) sum(x) > 1, TRUE)
fungo = phyloseq::rarefy_even_depth(fungo, sample.size = 2200)
bako = phyloseq::filter_taxa(bako, function(x) sum(x) > 1, TRUE)
bako = phyloseq::rarefy_even_depth(bako, sample.size = 9500)

shannon_bakt <- vegan::diversity(phyloseq::otu_table(bako),'shannon')
shannon_fung <- vegan::diversity(phyloseq::otu_table(fungo),'shannon')

# Bacteria
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[a-z]")),
                    phyloseq::sample_data(bako)$Plot, "-",
                    phyloseq::sample_data(bako)$Trtment)

sha_bako <- data.frame(row.names = phyloseq::sample_data(bako)$sample_id,
                       Sample_ID,
                       shannon_bakt)
# Fungi
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[a-z]")),
                    phyloseq::sample_data(fungo)$Plot, "-",
                    phyloseq::sample_data(fungo)$Trtment)
sha_fung <- data.frame(row.names = phyloseq::sample_data(fungo)$sample_id,
                       Sample_ID,
                       shannon_fung)
dplyr::left_join(main_data, sha_bako) %>%
  dplyr::left_join(sha_fung) -> main_data

From earthworm species occurence table to diversity metrics

Same type of things as for bacteria and fungi but with a bit less filtering and without using phyloseq because earthworms data are based on actuall individual sampling rather than sequencing data.

# Earthworms
names(earthworm_community)[1] <- "Sample_ID"
mat_ea <- as.matrix(earthworm_community[,2:9])
row.names(mat_ea) <- earthworm_community[,1]
shannon_worm <- vegan::diversity(mat_ea,'shannon')
sha_worm <- data.frame(row.names = earthworm_community$Sample_ID,
                       Sample_ID = earthworm_community$Sample_ID,
                       shannon_worm)
main_data <- dplyr::left_join(main_data, sha_worm)

Preparation of the list of variable

Here, I will be preparing a list with all the data and parameters that I use for the statistical analysis. I am making very similar plots and statistics as I related several dependent variables to one independent variable, namely "distance from the windbreaks".

ls_dependent_variables_to_edge_distance <- list(
  water_content = list(varname = "Gravimetric humidity",
                       ylab = "Gravimetric humidity",
                       dataset = list(to_plot = main_data$Gravimetric_humidity,
                                      for_stat = main_data$Gravimetric_humidity)
  ),
  whc = list(varname = "Water holding capacity",
             ylab = "Water holding capacity",
             dataset = list(to_plot = main_data$Whc,
                            for_stat = main_data$Whc)
  ),
  total_c = list(varname = "Organic C (g/kg)",
                 ylab = expression(paste("Organic C (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ctot,
                                for_stat = main_data$Ctot)
  ),
  total_n = list(varname = "Total nitrogen (g/kg)",
                 ylab = expression(paste("Total nitrogen (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ntot,
                                for_stat = main_data$Ntot)
  ),
  c_n_ratio = list(varname = "C:N",
                   ylab = "C:N",
                 dataset = list(to_plot = main_data$C_N,
                                for_stat = main_data$C_N)
  ),
  pH = list(varname = "pH",
            ylab = "pH",
            dataset = list(to_plot = main_data$pH,
                           for_stat = main_data$pH)
  ),
  nem = list(varname = "log(Nematode density (no./kg))",
             ylab = expression(paste("Nematode density (no. ",kg^{-1},")")),
             dataset = list(to_plot = main_data$Nematode_count * 20,
                            for_stat = log((main_data$Nematode_count * 20)+1))
  ),
  ea_count = list(varname = "log(Earthworm density (no./m2))",
                  ylab = expression(paste("Earthworm density (no. ",m^{2},")")),
                  dataset = list(to_plot = main_data$Earthworm_count/0.09,
                                 for_stat = log((main_data$Earthworm_count/0.09)+1))
  ),
  ea_div = list(varname = "log(Earthworm species richness)",
                ylab = "Earthworm species richness",
                dataset = list(to_plot = main_data$Earthworm_div,
                               for_stat = log(main_data$Earthworm_div + 1))
  ),
  ea_biomass = list(varname = "log(Earthworm biomass (g/m2))",
                    ylab = expression(paste("Earthworm biomass g", m^{2}, ")")),
                    dataset = list(to_plot = main_data$Earthworm_biomass/0.09,
                                   for_stat = log((main_data$Earthworm_biomass/0.09)+1))
  ),
  bakt_div = list(varname = "Bacteria Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," Bacteria")),
                  dataset = list(to_plot = main_data$shannon_bakt,
                                 for_stat = main_data$shannon_bakt)
  ),
  fung_div = list(varname = "Fungi Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," fungi")),
                  dataset = list(to_plot = main_data$shannon_fung,
                                 for_stat = main_data$shannon_fung)
  )
)

Statistical analysis

Here, I use the function that I created to make mix effect linear regression, mixed effect Anova, and multiple comparaison of mix effect linear regression (with factors). Within this code chunk, I also make tables of results that I export to csv.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$stats <- 
    stats_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$dataset$for_stat)
}

table_anova_mixed_effect <- function(my_list, varname = ""){
  res <- summary(aov(y ~ Treatment + Error(Field_block/Treatment), data=my_list$df))[[2]][[1]]
  class(res) <- "data.frame"
  res <- res[1,]
  res$variable_name <- varname
  return(res)
}

list_table_anova <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_anova[[i]] <- table_anova_mixed_effect(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                  deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_anova_mixed <- bind_rows(list_table_anova)

all_anova_mixed %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`Pr(>F)`/dplyr::percent_rank(`Pr(>F)`), scientific=F),
    "p-value" = format(`Pr(>F)`, scientific=F)
  ) -> all_anova_mixed
write.csv(all_anova_mixed, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/anova_distance_effect.csv"))


list_table_multcomp <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_multcomp[[i]] <- table_of_multiple_comparaison(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                    deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_comps <- bind_rows(list_table_multcomp)

all_comps %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`p-value`/dplyr::percent_rank(`p-value`), scientific=F),
    "p-value" = format(`p-value`, scientific=F)
  ) -> all_comps

write.csv(all_comps, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/multiple_comparaisons.csv"))

Plots

Here, I use the function that I created to make boxplots of the dependent variable as a function of windbreak distance. Within this code chunk, I also save the plots as png.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist <- 
    plot_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                             my_ylab = ls_dependent_variables_to_edge_distance[[i]]$ylab)
}

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  png(paste0(cur_dir,
             'Msc/manuscript_nem_vdt_david_jan29/figure/',
             stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
             '.png'),
      width = 8.4, height = 8.4, units = 'cm', res = 300)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()

  setEPS()
  postscript(paste0(cur_dir,
                    'Msc/manuscript_nem_vdt_david_jan29/figure/',
                    stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
                    '.png'), width = 3.30, height = 3.30)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()
}


# # Combine plots
# png(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.png'), width = 8.4, height = 8.4, units = 'cm', res = 300)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()
# 
# setEPS()
# postscript(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.eps'), width = 3.3, height = 3.3)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()