knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

When validating a statistical methodology it is often useful to test the method using simulated data. Here we test whether a method can correctly identify no pattern when the data is random, and similarly detect the correct pattern when there is a know n structure. This is another way to test whether given the amount and temporal resolution of the data is sufficent to correctly indentify the desired patterns. In both cases below we show the dificulty of dealing with low sample sizes in making inferences.

Libraries

#if netTS is not yet installed you can use the following code to install it from github:
#devtools::install_github("tbonne/netTS")

#https://github.com/r-lib/devtools/issues/1667   ignore build vignette

library(ggplot2)
library(netTS)
library(igraph)
library(reshape2)
library(lubridate)
library(mgcv)
#library(lineprof)
#t<-lineprof(sim.events.data(nodes=20, sampling.periods = 2000,sampling.periods.per.day = 20))
#t
#shine(t)

Measurement function

Mean Betweenness

#1. use mean strength of the network
mean_betweenness <- function (net) {
  md <- mean(betweenness(net))
    return(md)
}

Testing for structure when there is none

Simulate data: random

#simulate some data (too long to run)
df.rand <-sim.events.data(nodes=30, sampling.periods = 100, sampling.periods.per.day = 2)

head(df.rand)

Get network measures overtime and test with permutations: 30 days

#1. Extract network strength over time
str.30 <- graphTS(df.rand,windowsize = days(30), windowshift = days(1), measureFun = mean_betweenness, effortFun = effort.events, nperm=100)

#2. Plot the results
ggplot(str.30, aes(x=windowstart, y=measure))+geom_point()+geom_line()+
  geom_ribbon(data=str.30, aes(ymin=CI.low, ymax=CI.high), fill="red", alpha=0.2)+
  labs(y="Mean strength",x="Date")+theme_classic()

Run statistical model

#convert dates to time from start
str.30$time <- as.numeric(as.duration(str.30$windowstart-(min(str.30$windowstart))))
str.30$time <- scale(str.30$time)

#fit a generalized additive model to test for a temporal trend
fit.gam.30 <- gam(measure ~ s(time), data = str.30)

#plot estiamted temporal trend in strength (should be flat!)
plot(fit.gam.30)

Testing for structure when there is some

Simulate data: star structure

#1. create a known underlying network
net1 = sample_k_regular(30, k=3, directed = FALSE, multiple = FALSE) 
E(net1)$weight <- runif(ecount(net1))
true.betweeneess <- mean(betweenness(net1))
plot(net1, edge.width=E(net1)$weight)

#2. simulate event data using the 'true' network: net1
df.obs <-sim.events.data(nodes=30, sampling.periods = 100,sampling.periods.per.day = 2, true.net = net1 )

#3. take a look at the data
head(df.obs)

Get network measures overtime and test with permutations: 30 days

#1. Extract network strength over time
bet.30.true <- graphTS(df.obs,windowsize = days(30), windowshift = days(1), measureFun = mean_betweenness, effortFun = effort.events, nperm=100)

#2. Plot the results
ggplot(bet.30.true, aes(x=windowstart, y=measure))+geom_point()+geom_line()+
  geom_ribbon(data=bet.30.true, aes(ymin=CI.low, ymax=CI.high), fill="red", alpha=0.2)+
  labs(y="Mean betweenness",x="Date")+theme_classic() + geom_hline(yintercept = true.betweeneess, linetype="dashed")

Run statistical model

#convert dates to time from start
bet.30.true$time <- as.numeric(as.duration(bet.30.true$windowstart-(min(bet.30.true$windowstart))))
bet.30.true$time <- scale(bet.30.true$time)

#fit a generalized additive model to test for a temporal trend
fit.gam.30.true <- gam(measure ~ s(time), data = bet.30.true)

#true mean betweeness estiamted?
summary(fit.gam.30.true)

#plot estiamted temporal trend in strength (should be flat!)
plot(fit.gam.30.true)


tbonne/netTS documentation built on July 26, 2021, 2:27 a.m.