Introduction

agroPack contains the data and the functions used to analyse the relationship between soil biodiversity and windbreaks in arable lands of southern Quebec

Back to top

loading the packages

Throughout the code I use agroPack. Which can be installed with devtools::install_github("maximerivest/agroPack"). I also use dplyr and ggplot2, both available on CRAN.

library(agroPack)
library(ggplot2)
library(dplyr)
library(agroPack)
library(ggplot2)
library(dplyr)

Back to top

Give path to my document

I use two computers with exactly the some content in "Documents" but one is hosted on a different hardrive. Depending where I run this script I comment in/out either of the paths.

#cur_dir <- "/media/bigdrive/document_FedoraFall2018/"
cur_dir <- "~/Documents/"

Back to top

From sequencing data to richness

I inserted data into the package. I tend to load raw data into the package and do the transformation here. So the following lines of codes transforms the OTU data.table into species diversity metric. I start from a phyloseq data structure, use phyloseq for some filtering and transformation and then I extract the otu_table and match rownames. Finally, this data structure can be feed into a vegan function.

bako <- baktps
fungo <- fungips
#---- Rarefy to even depth and Compute shannon diversity -----------------------
set.seed(100)
fungo = phyloseq::filter_taxa(fungo, function(x) sum(x) > 1, TRUE)
fungo = phyloseq::rarefy_even_depth(fungo, sample.size = 2200)
bako = phyloseq::filter_taxa(bako, function(x) sum(x) > 1, TRUE)
bako = phyloseq::rarefy_even_depth(bako, sample.size = 9500)

shannon_bakt <- vegan::diversity(phyloseq::otu_table(bako),'shannon')
shannon_fung <- vegan::diversity(phyloseq::otu_table(fungo),'shannon')

# Bacteria
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[a-z]")),
                    phyloseq::sample_data(bako)$Plot, "-",
                    phyloseq::sample_data(bako)$Trtment)

sha_bako <- data.frame(row.names = phyloseq::sample_data(bako)$sample_id,
                       Sample_ID,
                       shannon_bakt)
# Fungi
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[a-z]")),
                    phyloseq::sample_data(fungo)$Plot, "-",
                    phyloseq::sample_data(fungo)$Trtment)
sha_fung <- data.frame(row.names = phyloseq::sample_data(fungo)$sample_id,
                       Sample_ID,
                       shannon_fung)
dplyr::left_join(main_data, sha_bako) %>%
  dplyr::left_join(sha_fung) -> main_data

Back to top

From earthworm species occurence table to diversity metrics

Same type of things as for bacteria and fungi but with a bit less filtering and without using phyloseq because earthworms data are based on actuall individual sampling rather than sequencing data.

# Earthworms
names(earthworm_community)[1] <- "Sample_ID"
mat_ea <- as.matrix(earthworm_community[,2:9])
row.names(mat_ea) <- earthworm_community[,1]
shannon_worm <- vegan::diversity(mat_ea,'shannon')
sha_worm <- data.frame(row.names = earthworm_community$Sample_ID,
                       Sample_ID = earthworm_community$Sample_ID,
                       shannon_worm)
main_data <- dplyr::left_join(main_data, sha_worm)

Preparation of the list of variable

Here, I will be preparing a list with all the data and parameters that I use for the statistical analysis. I am making very similar plots and statistics as I related several dependent variables to one independent variable, namely "distance from the windbreaks".

ls_dependent_variables_to_edge_distance <- list(
  water_content = list(varname = "Gravimetric humidity",
                       ylab = "Gravimetric humidity",
                       dataset = list(to_plot = main_data$Gravimetric_humidity,
                                      for_stat = main_data$Gravimetric_humidity)
  ),
  whc = list(varname = "Water holding capacity",
             ylab = "Water holding capacity",
             dataset = list(to_plot = main_data$Whc,
                            for_stat = main_data$Whc)
  ),
  total_c = list(varname = "Organic C (g/kg)",
                 ylab = expression(paste("Organic C (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ctot,
                                for_stat = main_data$Ctot)
  ),
  total_n = list(varname = "Total nitrogen (g/kg)",
                 ylab = expression(paste("Total nitrogen (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ntot,
                                for_stat = main_data$Ntot)
  ),
  c_n_ratio = list(varname = "C:N",
                   ylab = "C:N",
                 dataset = list(to_plot = main_data$C_N,
                                for_stat = main_data$C_N)
  ),
  pH = list(varname = "pH",
            ylab = "pH",
            dataset = list(to_plot = main_data$pH,
                           for_stat = main_data$pH)
  ),
  nem = list(varname = "log(Nematode density (no./kg))",
             ylab = expression(paste("Nematode density (no. ",kg^{-1},")")),
             dataset = list(to_plot = main_data$Nematode_count * 20,
                            for_stat = log((main_data$Nematode_count * 20)+1))
  ),
  ea_count = list(varname = "log(Earthworm density (no./m2))",
                  ylab = expression(paste("Earthworm density (no. ",m^{2},")")),
                  dataset = list(to_plot = main_data$Earthworm_count/0.09,
                                 for_stat = log((main_data$Earthworm_count/0.09)+1))
  ),
  ea_div = list(varname = "log(Earthworm species richness)",
                ylab = "Earthworm species richness",
                dataset = list(to_plot = main_data$Earthworm_div,
                               for_stat = log(main_data$Earthworm_div + 1))
  ),
  ea_biomass = list(varname = "log(Earthworm biomass (g/m2))",
                    ylab = expression(paste("Earthworm biomass g", m^{2}, ")")),
                    dataset = list(to_plot = main_data$Earthworm_biomass/0.09,
                                   for_stat = log((main_data$Earthworm_biomass/0.09)+1))
  ),
  bakt_div = list(varname = "Bacteria Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," Bacteria")),
                  dataset = list(to_plot = main_data$shannon_bakt,
                                 for_stat = main_data$shannon_bakt)
  ),
  fung_div = list(varname = "Fungi Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," fungi")),
                  dataset = list(to_plot = main_data$shannon_fung,
                                 for_stat = main_data$shannon_fung)
  )
)

Back to top

Statistical analysis

Here, I use the function that I created to make mix effect linear regression, mixed effect Anova, and multiple comparaison of mix effect linear regression (with factors). Within this code chunk, I also make tables of results that I export to csv.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$stats <- 
    stats_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$dataset$for_stat)
}

table_anova_mixed_effect <- function(my_list, varname = ""){
  res <- summary(aov(y ~ Treatment + Error(Field_block/Treatment), data=my_list$df))[[2]][[1]]
  class(res) <- "data.frame"
  res <- res[1,]
  res$variable_name <- varname
  return(res)
}

list_table_anova <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_anova[[i]] <- table_anova_mixed_effect(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                  deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_anova_mixed <- bind_rows(list_table_anova)

all_anova_mixed %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`Pr(>F)`/dplyr::percent_rank(`Pr(>F)`), scientific=F),
    "p-value" = format(`Pr(>F)`, scientific=F)
  ) -> all_anova_mixed
write.csv(all_anova_mixed, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/anova_distance_effect.csv"))


list_table_multcomp <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_multcomp[[i]] <- table_of_multiple_comparaison(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                    deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_comps <- bind_rows(list_table_multcomp)

all_comps %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`p-value`/dplyr::percent_rank(`p-value`), scientific=F),
    "p-value" = format(`p-value`, scientific=F)
  ) -> all_comps

write.csv(all_comps, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/multiple_comparaisons.csv"))

Back to top

Plots

Here, I use the function that I created to make boxplots of the dependent variable as a function of windbreak distance. Within this code chunk, I also save the plots as png.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist <- 
    plot_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                             my_ylab = ls_dependent_variables_to_edge_distance[[i]]$ylab)
}

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  png(paste0(cur_dir,
             'Msc/manuscript_nem_vdt_david_jan29/figure/',
             stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
             '.png'),
      width = 8.4, height = 8.4, units = 'cm', res = 300)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()

  setEPS()
  postscript(paste0(cur_dir,
                    'Msc/manuscript_nem_vdt_david_jan29/figure/',
                    stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
                    '.png'), width = 3.30, height = 3.30)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()
}


# # Combine plots
# png(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.png'), width = 8.4, height = 8.4, units = 'cm', res = 300)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()
# 
# setEPS()
# postscript(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.eps'), width = 3.3, height = 3.3)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()

Back to top



MaximeRivest/agroPack documentation built on May 3, 2019, 3:33 p.m.