knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
When validating a statistical methodology it is often useful to test the method using simulated data. Here we test whether a method can correctly identify no pattern when the data is random, and similarly detect the correct pattern when there is a know n structure. This is another way to test whether given the amount and temporal resolution of the data is sufficent to correctly indentify the desired patterns. In both cases below we show the dificulty of dealing with low sample sizes in making inferences.
#if netTS is not yet installed you can use the following code to install it from github: #devtools::install_github("tbonne/netTS") #https://github.com/r-lib/devtools/issues/1667 ignore build vignette library(ggplot2) library(netTS) library(igraph) library(reshape2) library(lubridate) library(mgcv) #library(lineprof) #t<-lineprof(sim.events.data(nodes=20, sampling.periods = 2000,sampling.periods.per.day = 20)) #t #shine(t)
Mean Betweenness
#1. use mean strength of the network mean_betweenness <- function (net) { md <- mean(betweenness(net)) return(md) }
Simulate data: random
#simulate some data (too long to run) df.rand <-sim.events.data(nodes=30, sampling.periods = 100, sampling.periods.per.day = 2) head(df.rand)
Get network measures overtime and test with permutations: 30 days
#1. Extract network strength over time str.30 <- graphTS(df.rand,windowsize = days(30), windowshift = days(1), measureFun = mean_betweenness, effortFun = effort.events, nperm=100) #2. Plot the results ggplot(str.30, aes(x=windowstart, y=measure))+geom_point()+geom_line()+ geom_ribbon(data=str.30, aes(ymin=CI.low, ymax=CI.high), fill="red", alpha=0.2)+ labs(y="Mean strength",x="Date")+theme_classic()
Run statistical model
#convert dates to time from start str.30$time <- as.numeric(as.duration(str.30$windowstart-(min(str.30$windowstart)))) str.30$time <- scale(str.30$time) #fit a generalized additive model to test for a temporal trend fit.gam.30 <- gam(measure ~ s(time), data = str.30) #plot estiamted temporal trend in strength (should be flat!) plot(fit.gam.30)
Simulate data: star structure
#1. create a known underlying network net1 = sample_k_regular(30, k=3, directed = FALSE, multiple = FALSE) E(net1)$weight <- runif(ecount(net1)) true.betweeneess <- mean(betweenness(net1)) plot(net1, edge.width=E(net1)$weight) #2. simulate event data using the 'true' network: net1 df.obs <-sim.events.data(nodes=30, sampling.periods = 100,sampling.periods.per.day = 2, true.net = net1 ) #3. take a look at the data head(df.obs)
Get network measures overtime and test with permutations: 30 days
#1. Extract network strength over time bet.30.true <- graphTS(df.obs,windowsize = days(30), windowshift = days(1), measureFun = mean_betweenness, effortFun = effort.events, nperm=100) #2. Plot the results ggplot(bet.30.true, aes(x=windowstart, y=measure))+geom_point()+geom_line()+ geom_ribbon(data=bet.30.true, aes(ymin=CI.low, ymax=CI.high), fill="red", alpha=0.2)+ labs(y="Mean betweenness",x="Date")+theme_classic() + geom_hline(yintercept = true.betweeneess, linetype="dashed")
Run statistical model
#convert dates to time from start bet.30.true$time <- as.numeric(as.duration(bet.30.true$windowstart-(min(bet.30.true$windowstart)))) bet.30.true$time <- scale(bet.30.true$time) #fit a generalized additive model to test for a temporal trend fit.gam.30.true <- gam(measure ~ s(time), data = bet.30.true) #true mean betweeness estiamted? summary(fit.gam.30.true) #plot estiamted temporal trend in strength (should be flat!) plot(fit.gam.30.true)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.