library(knitr)
opts_chunk$set(eval=FALSE)
library(igraph)
library(dplyr)
library(tidyr)
library(stringr)

library(nrsimulatr)

Two-parameter model

set.seed(12345)

Now set up the params for one random draw.

We'll set this up to explore what happens when we change $\rho$, the relative probability of an edge between a hidden population member and a non-hidden population member. We'll hold all of the other parameters fixed:

We first use expand.grid to build a dataframe containing all of the combinations of the parameters we will explore with the simulation.

sim.params <- expand.grid(# size of total population, U
                          N=10e3,
                          # fraction of the population that is in the frame population (F)
                          p.F=.5,
                          # fraction of the population that is in the hidden popn (H)
                          p.H=.03,
                          # probability of being in F, conditional on being in H
                          p.F.given.H=1,
                          # prob of edge between two nodes in the same group 
                          zeta=.05,
                          # relative prob of edge between hidden and not hidden 
                          rho=c(.5, .8, 1),
                          # relative prob of edge between frame and not frame
                          xi = 0.6,
                          # sampling fraction for simple random sample w/out replacement from F
                          frame.sampling.frac=.3,
                          # sampling fraction for relative probability sample from H
                          hidden.sampling.frac=.4,
                          # true positive rate
                          tau=.8)

# add an id for each parameter combination
sim.params$param.index <- 1:nrow(sim.params)

Next, we take our dataframe and turn it into a list of rows; then, for each entry in the list of rows, we call the function required to construct a simulation parameters object.

all.param.list <- split(sim.params, rownames(sim.params))

# we would change 'type' if we wanted to use a different type of
# simulation model -- for example, a one-parameter stochastic blockmodel with four groups
all.param.list <- plyr::llply(all.param.list,
                              sbm_params,
                              type="4group_2param")

Now we conduct the actual simulation.

## we'll repeat each set of parameter combinations this number of times
## (small here to keep this reasonably fast; in practice, you probably want many more)
num.sims <- 2

sim.ests <- plyr::ldply(all.param.list,
             function(these.sim.params) {

                # construct an object containing the parameters that govern
                # reporting (in our case, this will use our tau parameter to
                # simulate false negatives)
                rep.params <- imperfect_reporting(these.sim.params)

                # construct an object containing the parameters that govern
                # sampling from the frame population
                frame.sample.params <- frame_srs(list(sampling.frac=
                                                      these.sim.params$frame.sampling.frac))

                # construct an object containing the parameters that govern
                # sampling from the hidden population
                hidden.sample.params <- hidden_relprob(list(sampling.frac=
                                                            these.sim.params$hidden.sampling.frac,
                                                            inclusion.trait='d.degree'))

                this.N <- these.sim.params$N

                ## this goes through each repetition for the parameter set we're on
                these.res <- plyr::ldply(1:num.sims,
                                   function(this.idx) {

                                     # draw a random graph from the stochastic block model
                                     this.g <- generate_graph(these.sim.params)

                                     # use the reporting parameters to convert the
                                     # undirected social network into a directed
                                     # reporting graph
                                     this.r <- reporting_graph(rep.params, this.g)

                                     # get some quantities about the entire reporting graph
                                     # from a census
                                     census.df <- sample_graph(entire_census(), this.r)
                                     #res <- sbm_census_quantities(census.df)

                                     res <- list()

                                     # get sample-based estimates from
                                     # a sample of the frame population and a sample
                                     # of the hidden population
                                     frame.df <- sample_graph(frame.sample.params,
                                                              this.r)
                                     hidden.df <- sample_graph(hidden.sample.params,
                                                               this.r)

                                     # note that we will assume that the degrees
                                     # are correct (ie, we are not simulating the
                                     # known population method)
                                     res$sample.y.F.H <- with(frame.df,
                                                              sum((y.FH + y.notFH)*
                                                                  sampling.weight))

                                     res$sample.d.F.U <- with(frame.df,
                                                              sum(d.degree*sampling.weight))

                                     res$sample.dbar.F.F <- with(frame.df,
                                                                 sum((d.FH + d.FnotH)*
                                                                     sampling.weight)/
                                                                 sum(sampling.weight))

                                     res$sample.vbar.H.F <- with(hidden.df,
                                                              sum((v.FnotH + v.FH)*
                                                                  sampling.weight)/
                                                              sum(sampling.weight))

                                     res$sample.basic <- with(res,
                                                              (sample.y.F.H / sample.d.F.U)*
                                                              this.N)

                                     res$sample.adapted <- with(res,
                                                                sample.y.F.H / sample.dbar.F.F)

                                     res$sample.generalized <- with(res,
                                                                    sample.y.F.H / sample.vbar.H.F)

                                     # for convenience, keep track of which rep this is
                                     res$rep <- this.idx

                                     return(data.frame(res))
                                   })

                ## for convenience, keep track of which parameter set this is
                these.res$param.index <- these.sim.params$param.index

                return(these.res)
             })

Finally, summarize the results

ci_low <- function(x) { quantile(x, .025, na.rm=TRUE) }
ci_high <- function(x) { quantile(x, .975, na.rm=TRUE) }

sim.agg.byest <- sim.ests %>%
                 group_by(param.index) %>%
                 rename(basic=sample.basic, 
                        adapted=sample.adapted, 
                        generalized=sample.generalized) %>%
                 summarise_each(funs(mean, ci_low, ci_high),
                                basic, adapted, generalized) %>%
                 gather(qty, value,
                        matches("basic|adapted|generalized")) %>%
                 separate(qty, c("estimator", "qty"), sep="\\_", extra="merge") %>%
                 spread(qty, value)

# merge in parameter values
sim.params <- sim.params %>% mutate(N.H = p.H * N)

sim.agg.byest <- inner_join(sim.agg.byest, sim.params, by='param.index')

Plot the bias

sim.agg.byest <- sim.agg.byest %>% mutate(bias = mean - N.H) 

bias.tab <- sim.agg.byest %>% select(estimator, rho, bias) %>%
                              spread(estimator, bias)

kable(bias.tab)


dfeehan/nrsimulatr documentation built on Feb. 27, 2024, 3:18 a.m.