README.md

Using the agroPack package to generate the results

Introduction

agroPack contains the data and the functions used to analyse the relationship between soil biodiversity and windbreaks in arable lands of southern Quebec

Back to top

loading the packages

Throughout the code I use agroPack. Which can be installed with devtools::install_github("maximerivest/agroPack"). I also use dplyr and ggplot2, both available on CRAN.

library(agroPack)
library(ggplot2)
library(dplyr)

Back to top

Give path to my document

I use two computers with exactly the some content in "Documents" but one is hosted on a different hardrive. Depending where I run this script I comment in/out either of the paths.

#cur_dir <- "/media/bigdrive/document_FedoraFall2018/"
cur_dir <- "~/Documents/"

Back to top

From sequencing data to richness

I inserted data into the package. I tend to load raw data into the package and do the transformation here. So the following lines of codes transforms the OTU data.table into species diversity metric. I start from a phyloseq data structure, use phyloseq for some filtering and transformation and then I extract the otu_table and match rownames. Finally, this data structure can be feed into a vegan function.

bako <- baktps
fungo <- fungips
#---- Rarefy to even depth and Compute shannon diversity -----------------------
set.seed(100)
fungo = phyloseq::filter_taxa(fungo, function(x) sum(x) > 1, TRUE)
fungo = phyloseq::rarefy_even_depth(fungo, sample.size = 2200)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`

## ...

## 5 samples removedbecause they contained fewer reads than `sample.size`.

## Up to first five removed samples are:

## 36288289290291

## ...

## 61OTUs were removed because they are no longer 
## present in any sample after random subsampling

## ...
bako = phyloseq::filter_taxa(bako, function(x) sum(x) > 1, TRUE)
bako = phyloseq::rarefy_even_depth(bako, sample.size = 9500)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## 
## ...

## 4 samples removedbecause they contained fewer reads than `sample.size`.

## Up to first five removed samples are:

## 289290291348

## ...

## 15OTUs were removed because they are no longer 
## present in any sample after random subsampling

## ...
shannon_bakt <- vegan::diversity(phyloseq::otu_table(bako),'shannon')
shannon_fung <- vegan::diversity(phyloseq::otu_table(fungo),'shannon')

# Bacteria
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(bako)$Site, "[a-z]")),
                    phyloseq::sample_data(bako)$Plot, "-",
                    phyloseq::sample_data(bako)$Trtment)

sha_bako <- data.frame(row.names = phyloseq::sample_data(bako)$sample_id,
                       Sample_ID,
                       shannon_bakt)
# Fungi
Sample_ID <- paste0(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[1-2]"), "-",
                    toupper(stringr::str_extract(phyloseq::sample_data(fungo)$Site, "[a-z]")),
                    phyloseq::sample_data(fungo)$Plot, "-",
                    phyloseq::sample_data(fungo)$Trtment)
sha_fung <- data.frame(row.names = phyloseq::sample_data(fungo)$sample_id,
                       Sample_ID,
                       shannon_fung)
dplyr::left_join(main_data, sha_bako) %>%
  dplyr::left_join(sha_fung) -> main_data
## Joining, by = "Sample_ID"

## Warning: Column `Sample_ID` joining factors with different levels, coercing
## to character vector

## Joining, by = "Sample_ID"

## Warning: Column `Sample_ID` joining character vector and factor, coercing
## into character vector

Back to top

From earthworm species occurence table to diversity metrics

Same type of things as for bacteria and fungi but with a bit less filtering and without using phyloseq because earthworms data are based on actuall individual sampling rather than sequencing data.

# Earthworms
names(earthworm_community)[1] <- "Sample_ID"
mat_ea <- as.matrix(earthworm_community[,2:9])
row.names(mat_ea) <- earthworm_community[,1]
shannon_worm <- vegan::diversity(mat_ea,'shannon')
sha_worm <- data.frame(row.names = earthworm_community$Sample_ID,
                       Sample_ID = earthworm_community$Sample_ID,
                       shannon_worm)
main_data <- dplyr::left_join(main_data, sha_worm)
## Joining, by = "Sample_ID"

## Warning: Column `Sample_ID` joining character vector and factor, coercing
## into character vector

Preparation of the list of variable

Here, I will be preparing a list with all the data and parameters that I use for the statistical analysis. I am making very similar plots and statistics as I related several dependent variables to one independent variable, namely "distance from the windbreaks".

ls_dependent_variables_to_edge_distance <- list(
  water_content = list(varname = "Gravimetric humidity",
                       ylab = "Gravimetric humidity",
                       dataset = list(to_plot = main_data$Gravimetric_humidity,
                                      for_stat = main_data$Gravimetric_humidity)
  ),
  whc = list(varname = "Water holding capacity",
             ylab = "Water holding capacity",
             dataset = list(to_plot = main_data$Whc,
                            for_stat = main_data$Whc)
  ),
  total_c = list(varname = "Organic C (g/kg)",
                 ylab = expression(paste("Organic C (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ctot,
                                for_stat = main_data$Ctot)
  ),
  total_n = list(varname = "Total nitrogen (g/kg)",
                 ylab = expression(paste("Total nitrogen (g ",kg^{-1},")")),
                 dataset = list(to_plot = main_data$Ntot,
                                for_stat = main_data$Ntot)
  ),
  c_n_ratio = list(varname = "C:N",
                   ylab = "C:N",
                 dataset = list(to_plot = main_data$C_N,
                                for_stat = main_data$C_N)
  ),
  pH = list(varname = "pH",
            ylab = "pH",
            dataset = list(to_plot = main_data$pH,
                           for_stat = main_data$pH)
  ),
  nem = list(varname = "log(Nematode density (no./kg))",
             ylab = expression(paste("Nematode density (no. ",kg^{-1},")")),
             dataset = list(to_plot = main_data$Nematode_count * 20,
                            for_stat = log((main_data$Nematode_count * 20)+1))
  ),
  ea_count = list(varname = "log(Earthworm density (no./m2))",
                  ylab = expression(paste("Earthworm density (no. ",m^{2},")")),
                  dataset = list(to_plot = main_data$Earthworm_count/0.09,
                                 for_stat = log((main_data$Earthworm_count/0.09)+1))
  ),
  ea_div = list(varname = "log(Earthworm species richness)",
                ylab = "Earthworm species richness",
                dataset = list(to_plot = main_data$Earthworm_div,
                               for_stat = log(main_data$Earthworm_div + 1))
  ),
  ea_biomass = list(varname = "log(Earthworm biomass (g/m2))",
                    ylab = expression(paste("Earthworm biomass g", m^{2}, ")")),
                    dataset = list(to_plot = main_data$Earthworm_biomass/0.09,
                                   for_stat = log((main_data$Earthworm_biomass/0.09)+1))
  ),
  bakt_div = list(varname = "Bacteria Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," Bacteria")),
                  dataset = list(to_plot = main_data$shannon_bakt,
                                 for_stat = main_data$shannon_bakt)
  ),
  fung_div = list(varname = "Fungi Shannon exponential",
                  ylab = expression(paste(e^{H*minute}," fungi")),
                  dataset = list(to_plot = main_data$shannon_fung,
                                 for_stat = main_data$shannon_fung)
  )
)

Back to top

Statistical analysis

Here, I use the function that I created to make mix effect linear regression, mixed effect Anova, and multiple comparaison of mix effect linear regression (with factors). Within this code chunk, I also make tables of results that I export to csv.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$stats <- 
    stats_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$dataset$for_stat)
}

table_anova_mixed_effect <- function(my_list, varname = ""){
  res <- summary(aov(y ~ Treatment + Error(Field_block/Treatment), data=my_list$df))[[2]][[1]]
  class(res) <- "data.frame"
  res <- res[1,]
  res$variable_name <- varname
  return(res)
}

list_table_anova <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_anova[[i]] <- table_anova_mixed_effect(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                  deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_anova_mixed <- bind_rows(list_table_anova)

all_anova_mixed %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`Pr(>F)`/dplyr::percent_rank(`Pr(>F)`), scientific=F),
    "p-value" = format(`Pr(>F)`, scientific=F)
  ) -> all_anova_mixed
write.csv(all_anova_mixed, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/anova_distance_effect.csv"))


list_table_multcomp <- list(1:length(ls_dependent_variables_to_edge_distance))
for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  list_table_multcomp[[i]] <- table_of_multiple_comparaison(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                                                    deparse(ls_dependent_variables_to_edge_distance[[i]]$varname))
}

all_comps <- bind_rows(list_table_multcomp)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
all_comps %>%
  mutate(
    "Benjamini-Hochberg corrected p-value" = format(`p-value`/dplyr::percent_rank(`p-value`), scientific=F),
    "p-value" = format(`p-value`, scientific=F)
  ) -> all_comps

write.csv(all_comps, paste0(cur_dir, "/Msc/manuscript_nem_vdt_david_jan29/data/multiple_comparaisons.csv"))

Back to top

Plots

Here, I use the function that I created to make boxplots of the dependent variable as a function of windbreak distance. Within this code chunk, I also save the plots as png.

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist <- 
    plot_for_soil_properties(ls_dependent_variables_to_edge_distance[[i]]$stats, 
                             my_ylab = ls_dependent_variables_to_edge_distance[[i]]$ylab)
}

for(i in 1:length(ls_dependent_variables_to_edge_distance)){
  png(paste0(cur_dir,
             'Msc/manuscript_nem_vdt_david_jan29/figure/',
             stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
             '.png'),
      width = 8.4, height = 8.4, units = 'cm', res = 300)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()

  setEPS()
  postscript(paste0(cur_dir,
                    'Msc/manuscript_nem_vdt_david_jan29/figure/',
                    stringr::str_replace(ls_dependent_variables_to_edge_distance[[i]]$varname,"/", " "),
                    '.png'), width = 3.30, height = 3.30)
  print(ls_dependent_variables_to_edge_distance[[i]]$boxplot_dist)
  dev.off()
}


# # Combine plots
# png(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.png'), width = 8.4, height = 8.4, units = 'cm', res = 300)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()
# 
# setEPS()
# postscript(paste0(cur_dir,'Msc/manuscript_nem_vdt_david_jan29/figure/soil_combined.eps'), width = 3.3, height = 3.3)
# gridExtra::grid.arrange(p_water_content,
#                         p_whc,
#                         p_c,
#                         p_n,
#                         p_c_n,
#                         p_ph, ncol = 3)
# dev.off()

Back to top



MaximeRivest/agroPack documentation built on May 3, 2019, 3:33 p.m.