Nothing
#' Create RDS samples with given characteristics
#'
#' @param net the network object from which to draw a sample
#' @param nnodes the number of nodes in the network [at least as default]
#' @param nsamp0 the number of seeds to be drawn (i.e. the size of the 0th wave of sampling)
#' @param fixinitial a variable that indicates the distribution from which to draw the initial seeds, if the seeds
#' variable is NULL and the seed.distribution variable is NULL
#' @param nsamp number of individuals in each RDS sample
#' @param replace sampling with replacement
#' @param coupons number of coupons
#' @param select not used
#' @param bias not used
#' @param rds.samp not used
#' @param seed.distribution a variable [what kind?] that indicates the distribution from which to draw the initial seeds
#' @param attrall Whether all the information about the sample should be returned [??]
#' @param trait.variable attribute of interest
#' @param nsims number of RDS samples to draw
#' @param seeds an array of seeds. Default is NULL, in which case the function draws the seeds from the nodes of the network.
#' @param prob.network.recall simulates the probability that an individual will remember any particular link
#' @param verbose Print verbose output
#' @return A list with the following elements:
#' nsample: vector of indices of sampled nodes
#' wsample: vector of waves of each sampled node
#' degsample: vector of degrees of sampled nodes
#' attrsample: vector of attrs of sampled nodes
#' toattr: vector of numbers of referrals to attrsd nodes
#' tonoattr: vector of number of referrans to unattrsd
#' nominators: recruiter of each sample
#' @export
rdssampleC <- function(net,
nnodes = network.size(net),
nsamp0,
fixinitial,
nsamp,
replace,
coupons,
select = NULL,
bias = NULL,
rds.samp = NULL,
seed.distribution = NULL,
attrall = FALSE,
trait.variable = "disease",
nsims = 1,
seeds = NULL,
prob.network.recall = 1,
verbose = TRUE) {
#via Coupons to the C function and minor changes there.
net <- as_network_uncompressed_rds(net)
attrs <- network::get.vertex.attribute(net, trait.variable)
if (is.null(attrs) || all(is.na(attrs))) {
stop(sprintf("No variable called %s appears in the data.", trait.variable))
}
degs <- sapply(net$iel, length) + sapply(net$oel, length)
if (is.logical(fixinitial[1]) &
fixinitial[1] == FALSE) {
fixinitial <- -1
}
#next piece of code seems to be modifying seed distribution
if (is.null(seed.distribution)) {
seed.distribution <- switch(
as.character(fixinitial[1]),
"-3" = {
rep(1, length = nnodes)
},
"-2" = {
# So attr(fixinitial,"table.seeds")=table(outcome[wave==0])
if (any(attrs == 1) &
any(attrs == 0)) {
table.seeds = attr(fixinitial, "table.seeds")
attrs * table.seeds[2] / sum(attrs ==
1) + (1 - attrs) * table.seeds[1] / sum(attrs == 0)
} else{
rep(1, length = nnodes)
}
},
"-1" = {
degs
},
"1" = {
attrs
},
"2" = {
attrs * degs
},
"3" = {
degs
},
degs
)
}
# Select seeds according to the seed distribution
if (is.null(seeds)) {
my.seeds <- c()
for (i in 1:nsims) {
temp.seeds <-
sample(
x = seq(along = seed.distribution),
size = nsamp0,
replace = FALSE,
prob = seed.distribution
)
my.seeds <- c(my.seeds, temp.seeds)
}
} else{
my.seeds <- seeds
}
if (sum(seed.distribution > 0) < nsamp0) {
if (verbose == TRUE) {
print(paste(
"No individuals available with positive probability of",
"being a seed."
))
}
}
n <- network.size(net)
target_num_recruits <- rep(coupons, n)
#code i wrote integrated into new function minorly modified.
Clist <- list(
n = n,
dir = FALSE,
bipartite = FALSE,
ndyads = n * (n - 1) / 2
)
e <-
as.edgelist(net, attrname = NULL) # Ensures that for undirected networks, tail<head
if (length(e) == 0) {
stop("The network is empty!")
}
if (!is.matrix(e)) {
e <- matrix(e, ncol = 2)
}
Clist$nedges <- dim(e)[1]
Clist$tails <- e[, 1]
Clist$heads <- e[, 2]
my.recruitedSampleAll <- c(rep(0, nsims * nsamp * 10))
my.recruitersAll <- c(rep(0, nsims * nsamp * 10))
my.recruitTimesAll <- c(rep(0, nsims * nsamp * 10))
my.recruitedSample <- c(rep(0, nsamp * 10))
my.recruiters <- c(rep(0, nsamp * 10))
my.recruitTimes <- c(rep(0, nsamp * 10))
max.log <- 15000
my.log <-
c(rep(" ", max.log))
my.seeds0 <-
c(rep(0, nsamp0)) #just added this (Maggie), also changed call to .C
log.length <- 0
#call to the C function
result <- .C(
"CRDSSample",
as.integer(Clist$tails),
as.integer(Clist$heads),
as.integer(Clist$nedges),
as.integer(Clist$n),
as.integer(Clist$dir),
as.integer(Clist$bipartite),
as.double(seed.distribution),
as.integer(my.recruitedSample),
as.integer(my.recruitedSampleAll),
as.integer(my.recruiters),
as.integer(my.recruitersAll),
as.double(my.recruitTimes),
as.double(my.recruitTimesAll),
as.character(my.log),
as.integer(target_num_recruits),
as.integer(my.seeds0),
as.integer(my.seeds),
as.integer(nsamp0),
as.integer(nsamp),
as.integer(nsims),
as.integer(verbose),
as.integer(max.log),
as.integer(log.length),
PACKAGE = "RDS"
)
init.deg <- degs[result[[9]]]
#simulates imperfect recall using the binomial distribution
if (prob.network.recall == 1) {
degree <- init.deg
} else {
degree <- c()
for (i in 1:length(init.deg)) {
degree <- c(degree, rbinom(1, init.deg[i], prob.network.recall))
}
}
allsamples <- list(
nsample = result[[9]],
nominators = result[[11]],
degsample = degree,
attrsample = attrs[result[[9]]],
time.recruited = result[[13]]
)
return(allsamples)
}
getsamples.RDS.C <- function(sim,
dummy,
M1,
M2,
N,
nsamp0,
fixinitial,
n,
coupons,
dummydis,
parallel = 0,
parallel.type = "PSOCK",
seed = 1,
seed.indices = NULL,
verbose = TRUE) {
maxdeg <- length(dummy) / 2 - 1
Msamples <- rep(0, length(dummy))
bisamples <- matrix(0, length(dummy), length(dummy))
sampbynet <- vector(M1, mode = "list")
bsdegsamples <- matrix(0, ncol = n, nrow = M1 * M2)
bsdissamples <- matrix(0, ncol = n, nrow = M1 * M2)
bsprev <- rep(0, M1 * M2)
parallel <- min(parallel, M1)
# parallel=1
if (!is.null(fixinitial)) {
if (is.logical(fixinitial[1]) &
fixinitial[1] == "FALSE") {
fixinitial <- -1
} #line added 0527
}
data <- list(
sim = sim,
N = N,
nsamp0 = nsamp0,
fixinitial = fixinitial,
M2 = M2,
dummy = dummy,
dummydis = dummydis,
maxdeg = maxdeg,
n = n,
coupons = coupons,
seed = seed,
seed.indices = seed.indices
)
sfn <- function(j, data) {
# SAN Next for sim version
# net<-as_network_uncompressed_rds(data$sim[[j]])
net <- as_network_uncompressed_rds(data$sim[[1]])
deg <- sapply(net$iel, length) + sapply(net$oel, length)
deg[deg > data$maxdeg] <- data$maxdeg
disease <- network::get.vertex.attribute(net, "disease")
popCounts <- tototab.RDS(deg, disease, data$dummy)
biCounts <- popCounts %*% t(popCounts)
seed.distribution <- switch(
as.character(data$fixinitial[1]),
"-3" = {
rep(1, length = network.size(net))
},
"-2" = {
# So attr(data$fixinitial,"table.seeds")=table(outcome[wave==0])
if (any(data$dissample == 1) &
any(disease == 0)) {
table.seeds = attr(data$fixinitial, "table.seeds")
disease * table.seeds[2] / sum(disease ==
1) + (1 - disease) * table.seeds[1] / sum(disease == 0)
} else{
rep(1, length = network.size(net))
}
},
"-1" = {
deg
},
"1" = {
disease
},
"2" = {
disease * deg
},
"3" = {
deg
},
deg
)
indices <- toindexref.RDS(deg, disease, data$dummy)
u <- sort(unique(data$seed.indices))
for (i in u) {
seed.distribution[indices == i] <- 1000
}
si <- rep(0, data$M2 * data$nsamp0)
for (k in 1:data$M2) {
si[(k - 1) * data$nsamp0 + (1:data$nsamp0)] <-
sample(
seq(along = seed.distribution),
size = data$nsamp0,
replace = FALSE,
prob = seed.distribution
)
for (i in u) {
if (any(indices == i)) {
if (sum(data$seed.indices == i) <= sum(indices == i)) {
si[(k - 1) * data$nsamp0 + (1:data$nsamp0)][data$seed.indices == i] <-
sample(
seq(along = indices)[indices == i],
size = sum(data$seed.indices == i),
replace = FALSE
)
} else{
si[(k - 1) * data$nsamp0 + (1:data$nsamp0)][data$seed.indices == i] <-
sample(
seq(along = indices)[abs(indices - i) < 2],
size = sum(data$seed.indices == i),
replace = FALSE
)
}
}
}
}
temp <-
rdssampleC(
net,
trait.variable = "disease",
nsamp0 = data$nsamp0,
fixinitial = data$fixinitial,
nsamp = data$n,
replace = FALSE,
coupons = data$coupons,
nsims = data$M2,
select = NULL,
bias = NULL,
seed.distribution = seed.distribution,
seeds = si
)
temp$degsample[temp$degsample > data$maxdeg] <- data$maxdeg
#counts of nodes of each type
disCount <- rep(0, data$N + 1)
Msamples <- rep(0, length(data$dummy))
bisamples <- matrix(0, length(data$dummy), length(data$dummy))
for (k in 1:data$M2) {
ind <- (k - 1) * data$n + (1:data$n)
tabsamp <-
tototab.RDS(temp$degsample[ind], temp$attrsample[ind], data$dummy)
disCount[sum(temp$attrsample[ind]) + 1] <-
disCount[sum(temp$attrsample[ind]) + 1] + 1
Msamples <- Msamples + tabsamp
mbym <- tabsamp %*% t(tabsamp) - diag(tabsamp)
bisamples <- bisamples + mbym
}
# Q <- (Msamples+data$n/data$N)/(data$M2*popCounts+1)
# QQ <- (bisamples+(data$n/data$N)^2)/((data$M2)^2 * biCounts+1)
Q <- (Msamples + 1) / (data$M2 * popCounts + 1)
# Q <- estpi(n,Msamples,popCounts,tabsamp,dummydis,disease,M2,Q)
QQ <- (bisamples + 1) / (data$M2 * biCounts + 1)
# Q[popCounts==0] <- 0
# QQ[ biCounts==0] <- 0
bsprev <- rep(0, data$M2)
temp2 <- matrix(0, length(data$dummy), length(data$dummy))
bsdegsamples <-
matrix(
temp$degsample,
ncol = data$n,
nrow = data$M2,
byrow = TRUE
)
bsdissamples <-
matrix(
temp$attrsample,
ncol = data$n,
nrow = data$M2,
byrow = TRUE
)
for (k in 1:data$M2) {
tabsamp <- tototab.RDS(bsdegsamples[k, ], bsdissamples[k, ], data$dummy)
popests <- tabsamp / Q
popests[Q == 0] <- 0
popests <- data$N * popests / sum(popests, na.rm = TRUE)
bsprev[k] <- sum(popests * data$dummydis, na.rm = TRUE) / data$N
}
#if(any(is.na(Q)) | any(is.na(bsprev))) {browser()}
#if(any(is.na(Q)) | any(is.na(bsprev))) {print(Q);print(bsprev)}
list(
Msamples = Msamples,
bisamples = bisamples,
temp2 = temp2,
Q = Q,
QQ = QQ,
bsprev = bsprev,
disCount = disCount,
bsdegsamples = bsdegsamples,
bsdissamples = bsdissamples
)
}
if (parallel > 1) {
# Run the jobs with rpvm or Rmpi
cl <-
beginparallel(parallel = parallel,
type = parallel.type,
seed = seed)
if (verbose) {
cat(paste("Starting parallel RDS samples in C\n"))
}
out <- parallel::clusterApplyLB(cl, as.list(1:M1), sfn, data)
if (verbose) {
cat(paste("Finished parallel RDS samples\n"))
}
endparallel(cl, type = parallel.type)
} else{
# non-parallel
out <- vector(mode = "list", length = M1)
if (verbose) {
cat(paste("Starting non-parallel RDS samples in C\n"))
}
for (j in 1:M1) {
out[[j]] <- sfn(j, data)
}
if (verbose) {
cat(paste("Finished non-parallel RDS samples in C\n"))
}
}
index <- 0
for (j in 1:M1) {
temp <- out[[j]]
bsdegsamples[index + (1:M2), ] <- temp$bsdegsamples
bsdissamples[index + (1:M2), ] <- temp$bsdissamples
bsprev[index + (1:M2)] <- temp$bsprev
if (j == 1) {
Q <- temp$Q / M1
QQ <- temp$QQ / M1
disCount <- temp$disCount
} else{
Q <- Q + temp$Q / M1
QQ <- QQ + temp$QQ / M1
disCount <- disCount + temp$disCount
}
index <- index + M2
Msamples <- Msamples + temp$Msamples
bisamples <- bisamples + temp$bisamples
sampbynet[[j]] <- temp$temp2
}
list(
Msamples = Msamples,
bisamples = bisamples,
sampbynet = sampbynet,
bsdegsamples = bsdegsamples,
bsdissamples = bsdissamples,
bsprev = bsprev,
Q = Q,
QQ = QQ,
disCount = disCount
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.