#' generate random social interaction data
#'
#' @param no number of interactions to be created
#' @param ni number of individuals
#' @param beh character, \code{beh} determines behaviours for the "positive" interactions of \code{datagen}
#' @param behw numeric, weights of occurences of behaviours in \code{datagen}
#' @param behdur logical, whether behaviours have durations
#' @param biaswithin bias parameter WTIHIN dyads: if 1: roughly 66\% of directionality (biased towards the ID that comes first in alphabet); if 0: no bias, i.e. 50\% bias
#' @param friendprop proportion of dyads with preferential (more frequent/longer) interactions
#' @param nadd numeric, additional number of non-focal IDs
#' @param presence numeric of length = 2, (1) proportion of IDs affected by absence, (2) proportion of days (relative to date range) for absence
#'
#' @return list with six items: (1) data.frame with interaction data, (2) data.frame with observation time data, (3) data.frame with presence data, (4) character with "friend" dyads, (5) character with focal individuals, (6) non-focal individuals
#' @details The process of inventing interactions within these functions starts with creating a data collection protocol. This protocol is modelled on focal animal sampling, in which the \code{ni} individuals serve as focal animals, and it ends up with a list of "protocols" for a given focal ID on several dates associated with pseudo-randomized observation time (i.e. protocol duration). See examples...
#'
#' The argument \code{friendprop} introduces higher rates or longer durations of interaction in this proportion of dyads as compared to other dyads. So far, this applies only to focal-focal dyads and won't introduce focal-nonfocal "friends" even if \code{nadd} is not zero.
#' @export
#' @importFrom stats rexp rnorm rnorm runif
#' @importFrom utils combn
#'
#' @examples
#' x <- datagen(no=20, ni=3)
#' # observation time table
#' x$ot
#' # interactions
#' x$dataseq
datagen <- function(no=50, ni=5, beh=c("appr", "gro", "supp", "prox"), behw=c(0.2, 0.3, 0.1, 0.4), behdur = c(F, T, F, T), biaswithin=1, friendprop=0.1, nadd=0, presence=NULL) {
# create ID vector
if(ni+nadd <= 26) ids <- sort(sample(letters, ni+nadd))
if(ni+nadd > 26 & ni+nadd <= 324) {
pos <- apply(combn(letters, 2), 2, function(z)paste(c(z[1], z[2]), collapse=""))
# just make sure that "ot" or "in" dont show up in the ids ("id" doesnt show up because the pairs of letters are sorted anyway)
pos <- pos[-c(177, 264)]
ids <- sort(sample(pos, ni+nadd))
}
# and focals & non-focals
focals <- sort(sample(ids, ni))
nonfocals <- ids[!ids %in% focals]
# create presence matrix
# if not otherwise specified it is assumed that all IDs were present at all times
if(is.null(presence)) pmat <- createnullpresence(IDs=ids, from="2000-01-01", to=as.Date("2000-01-01")+no-1)
# and modify it if desired
if(!is.null(presence)) {
if(length(presence)==2 & max(presence)<=1 & min (presence)>0) {
pmat <- createnullpresence(IDs=ids, from="2000-01-01", to=as.Date("2000-01-01")+no-1, propID=presence[1], propabs=presence[2])
}else{
pmat <- createnullpresence(IDs=ids, from="2000-01-01", to=as.Date("2000-01-01")+no-1)
warning("input for presence doesnt make sense, therefore full presence matrix used...", call.=FALSE)
}
}
dts <- sort(sample(1:no,no*0.6))
ppmat <- pmat[dts, ]
# possible follows...
posfol <- apply(ppmat[, focals], 1, function(z) colnames(ppmat[,focals])[z==1])
if (inherits(posfol, "matrix")) posfol <- lapply(apply(posfol, 2, list), unlist)
# limit to three follows per day
posfoln <- unlist(lapply(posfol, length)); posfoln[posfoln>3] <- 3
# create OT table for focals
# pool of observation times to sample from (based on 60min protocols with some jitter below that number)
ots <- sort(round(rnorm(500, 60, 10))); ots <- ots[ots<=60]; ots <- c(ots, rep(60, length(ots)))
ot <- matrix(ncol=3, nrow=0, ""); ct <- 1
i=names(posfol)[1]
for(i in names(posfol)) {
p <- sample(posfol[[as.character(i)]], posfoln[ct])
for(k in p) if(k %in% focals) ot <- rbind(ot, c(k, sample(ots, 1), i))
ct <- ct + 1
}
ot <- data.frame(focal=ot[,1], date=pmat$date[as.numeric(ot[,3])], OT=as.numeric(ot[,2]))
# create list with focals' preferred partners (here would also be the place to introduce directionality...)
# number of dyads with focals as one ID
pt <- matrix(nrow=length(focals), ncol=length(ids)-1, ""); rownames(pt) <- focals
ptw <- matrix(nrow=length(focals), ncol=length(ids)-1, 1); rownames(ptw) <- focals
for(f in focals) { pt[f, ] <- ids[ids!=f] }
# if(friendprop>0) {
ndyads <- length(pt)/2;
fdyads <- round(ndyads * friendprop)
if(fdyads>=1) {
p <- sample(1:ndyads, fdyads); cords <- expand.grid( 1:nrow(pt), 1:ncol(pt))
m=p[1]
prefdyads <- c()
for(m in p) {
ptw[m] <- 3
fs <- rownames(pt)[cords[m, 1]]
ps <- pt[m]
if(ps %in% focals) ptw[ps, pt[ps, ]==fs] <- 3 # this is the directionality step...
prefdyads <- c(prefdyads, paste(sort(c(fs, ps)), collapse="@"))
}
} else {
prefdyads <- NA
}
# }
# # add non-focals if existing
# if(length(nonfocals)>0) {
# for(i in 1:length(nonfocals)) {
# pt <- cbind(pt, nonfocals[i])
# ptw <- cbind(ptw, 1)
# }
# }
#
# generate the interactions...
res <- data.frame(date=rep(pmat[1,1], no), focal=factor(no, focals), actor=factor(no, ids), receiver=factor(no, ids), beh=factor(no, beh), dur=numeric(no), pref=FALSE, ct=NA)
ct=0
while(sum(!is.na(res$ct)) < no) {
i <- min(which(is.na(res$ct)))
# select a random protocol
prot <- ot[sample(1:nrow(ot), 1), ]; foc <- as.character(prot[1,1]); res$date[i] <- prot$date
# select a partner for the focal (with preference for preferred partners)
# accounting for presence and also for the possibility that non-focal ids are considered...
p <- pmat[pmat$date==prot$date, ids[-c(which(ids==as.character(prot$focal)))]]
(p <- names(p)[p==1])
if(length(p)>=1) {
part <- sample(pt[foc, pt[foc,] %in% p], 1, prob=ptw[foc, pt[foc,] %in% p])
# introduce the bias concerning actor/receiver
d <- sort(c(as.character(foc), part)); if(rnorm(1) > biaswithin) {d <- rev(d)}
# choose behaviour
b <- sample(1:length(beh), 1, prob=behw)
# and assign duration
dur <- ceiling(rexp(1, 0.02))
# for preferred dyads, increase the duration
if(ptw[as.character(foc), pt[as.character(foc),]==part] > 1) { dur <- round(dur * runif(1, 2.5, 4.5)); pref <- T }else{pref<-F}
# depending on behaviour make duration NA
if(!behdur[b]) dur <- NA
res$focal[i] <- foc
res$actor[i] <- d[1]
res$receiver[i] <- d[2]
res$beh[i] <- beh[b]
res$dur[i] <- dur
res$pref[i] <- pref
res$ct[i] <- 9999
ct <- ct + 1
}
}
# just checking whether at least one of the interactors was a focal (should be TRUE)
# sum(apply(res, 1, function(z) z[3] %in% focals | z[4] %in% focals))==no
# check whether sometimes both ids were among focal ids (should be 1s AND 2s)
# apply(res, 1, function(z) sum(c(z[3] %in% focals, z[4] %in% focals)))
# sort interactions by date...
res <- res[order(res$date), -c(which(colnames(res)=="ct"))]; rownames(res) <- NULL
# and finally split the presence for focals and nonfocals
# NO, don't! single presence enough
#if(nadd>0) { presence2 <- pmat[, c("date", nonfocals)];presence <- pmat[, c("date", focals)]
#} else { presence <- pmat; presence2 <- NULL }
return(list(dataseq=res, ot=ot, presence=pmat, prefdyads=prefdyads, focals=focals, nonfocs=nonfocals))
#return(list(dataseq=res, ot=ot, presence=presence, presence2=presence2, prefdyads=prefdyads, focals=focals, nonfocs=nonfocals))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.