Analysing the reliability of metrics across different MOTU clustering levels:

This code is for comparing how reliable your conclusions are in inter-network analyses using metabarcoding data.

(Dave Hemprich-Bennett, hemprich.bennett@gmail.com, @hammerheadbat)

Example vignette

library(LOTUS)

library(ggplot2)

data(batnets)

ind <- c('functional complementarity',
                         'web asymmetry',
                         'Fisher alpha', 'mean number of shared partners',
                         'niche overlap',
                         'nestedness',
                         'discrepancy',
                         'ISA')

## Use the metcalcs function to see how the indices change                          
m <- metcalcs(networks= batnets, indices = ind, network_level = 'higher')

## Plot them
g <- ggplot(m , aes(x = clustering, y = value, color = network)) +
  geom_point()+
  labs(x = 'clustering') +
  geom_smooth(method = lm, se = T)+
  scale_x_continuous(breaks = seq(91, 98, 1))+
  facet_wrap(~ metric, scales = 'free_y')+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
g

alt text

Plot in a simpler manner using the inbuilt function

line_plot(input = m, network = 'network', clustering = 'clustering', metric = 'metric', value = 'value', plotname = 'Batnets example')

alt text

Using the randomized ranges function on batnets

ran <- randomized_ranges(batnets, indices = c('connectance', 'web asymmetry'), network_level = 'higher', out_format = 'data.frame', quantiles_to_return = c(0.025, 0.975), actual_vals = T)
head(ran)

alt text

Beginner's example of loading in data from multiple datasets

This asumes that you're working in a directory with two subdirectories, 'SAFE' and 'Texas', with subdirectories corresponding to the clustering level used to generate them. E.g. screenshot below

alt text

#Make a vector of any files with 'binary' in their name
filenames <- list.files(pattern = '*binary*', recursive = T)

#Make subsets of this vector for your two networks
safe_filenames <- filenames[grep('SAFE', filenames)]
texas_filenames <- filenames[grep('Texas', filenames)]

#Load the files into named lists
SAFE <- lapply(safe_filenames, read.table, sep = '\t', header = T, stringsAsFactors = F)
names(SAFE) <- gsub('^.*/', '', dirname(safe_filenames))

texas <- lapply(texas_filenames, read.table, sep = '\t', header = T, stringsAsFactors = F)
names(texas) <- gsub('^.*/', '', dirname(texas_filenames))

#Make the final list from these objects
netlist <- list(SAFE, texas)
names(netlist) <- c('SAFE', 'Texas')

The object 'netlist' will then be in the format 'net_clust' for the metcalcs function.

Alternatively, to load in only a single network for use in metcalcs ('clust_only'), just follow the steps above for a single network only



hemprichbennett/lOTUs documentation built on Jan. 22, 2021, 8:54 p.m.