Nothing
#' Compile List of Network Edges from a Contact Table
#'
#' This function takes the output from contactDur.all or contactDur.area and
#' generates a data frame showing the list of edges in the contact network.
#'
#' @param x Output from the contactDur.all or contactDur.area functions. Can be
#' either a data frame or list of data frames.
#' @param importBlocks Logical. If true blocks will be carried over from x
#' input, allowing for time-ordered and time-aggregated network creation.
#' Defaults to FALSE.
#' @param removeDuplicates Logical. If x is from contactDur.all, to-from node
#' pairs in output will be reported twice (i.e., nodes will be listed as
#' both a to- and a from-node). If removeDuplicates == true, duplicated
#' edges are removed. Defaults to TRUE.
#' @param parallel Logical. If TRUE, sub-functions within the ntwrkEdges
#' wrapper will be parallelized. Note that the only sub-functions
#' parallelized here are called ONLY when importBlocks == TRUE, or when x is
#' a list of data frames.
#' @param nCores Integer. Describes the number of cores to be dedicated to
#' parallel processes. Defaults to half of the maximum number of cores
#' available (i.e., (parallel::detectCores()/2)).
#' @keywords data-processing contact
#' @return Output is a data frame with the following columns, and can easily
#' be used as input for igraph functions.
#'
#' \item{from}{The "from" nodes in a contact network. Can also be considered
#' "tail" nodes.}
#' \item{to}{The "to" nodes in a contact network. Can also be considered
#' "head" nodes.}
#' \item{durations}{The duration of each contact reported in \code{x}.}
#' \item{block}{(if applicable) time block during which reported contacts
#' occurred.}
#'
#' @import igraph
#' @examples
#' data("calves")
#'
#' calves.dateTime<-datetime.append(calves, date = calves$date,
#' time = calves$time) #create a dataframe with dateTime identifiers for location fixes.
#'
#' calves.agg<-tempAggregate(calves.dateTime, id = calves.dateTime$calftag,
#' dateTime = calves.dateTime$dateTime, point.x = calves.dateTime$x,
#' point.y = calves.dateTime$y, secondAgg = 300, extrapolate.left = FALSE,
#' extrapolate.right = FALSE, resolutionLevel = "reduced", parallel = FALSE,
#' na.rm = TRUE, smooth.type = 1) #smooth to 5-min fix intervals.
#'
#' calves.dist<-dist2All_df(x = calves.agg, parallel = FALSE,
#' dataType = "Point", lonlat = FALSE) #calculate inter-animal distances at each timepoint.
#'
#' calves.contact.NOblock<-contactDur.all(x = calves.dist, dist.threshold=1,
#' sec.threshold=10, blocking = FALSE, equidistant.time = FALSE,
#' parallel = FALSE, reportParameters = TRUE)
#'
#' calves.edges<-ntwrkEdges(x =calves.contact.NOblock, importBlocks = FALSE,
#' removeDuplicates = TRUE)
#'
#' calves.network1 <- igraph::graph_from_data_frame(d=calves.edges,
#' directed=FALSE)
#'
#' igraph::V(calves.network1)$color<- "orange1"
#' igraph::V(calves.network1)$size <-13
#' igraph::E(calves.network1)$width <- calves.edges$duration
#' igraph::E(calves.network1)$color <- "black"
#' igraph::plot.igraph(calves.network1, vertex.label.cex=0.4,
#' layout = igraph::layout.circle, main = "Inter-Calf Contacts") #plot the network
#'
#' @import foreach igraph
#' @export
ntwrkEdges<-function(x, importBlocks = FALSE, removeDuplicates = TRUE, parallel = FALSE, nCores = (parallel::detectCores()/2)){
#bind the following variables to the global environment so that the CRAN check doesn't flag them as potential problems
i <- NULL
j <- NULL
k <- NULL
dupAction<-removeDuplicates
par = parallel
cores = nCores
#write the sub-functions
summarizeContacts<- function(x, importBlocks, avg = FALSE, parallel, nCores){
#bind the following variables to the global environment so that the CRAN check doesn't flag them as potential problems
i <- NULL
#write sub-functions
summaryAgg.block<-function(x,y){ #calculates the mean potential contacts by id and block. Using this apply function is faster than simply aggregating the data set by id and block
sumTable<-y[which(y$block == unname(unlist(x[1]))),]
if(nrow(sumTable) == 0){output <- NULL #if there's nothing in the subset, the function will not proceed any further.
}else{
blockStart<- unique(lubridate::as_datetime(sumTable$block.start)) #added 02/05/2019 - had to keep track of this new information ; updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
blockEnd<- unique(lubridate::as_datetime(sumTable$block.end)) #added 02/05/2019 - had to keep track of this new information ; updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
blockNum<- unique(sumTable$numBlocks)
sumTable.redac<-sumTable[,-c(match("id", names(sumTable)), match("block", names(sumTable)), match("block.start", names(sumTable)), match("block.end", names(sumTable)), match("numBlocks", names(sumTable)))] #Remove the columns that cannot/shoud not be averaged.
output<-stats::aggregate(sumTable.redac, list(id = sumTable$id), mean) #this not only calculates the mean of each column by id, but also adds the "id" column back into the data set.
output$block = unname(unlist(x[1])) #add this information back into the table
output$block.start = blockStart #add this information back into the table
output$block.end = blockEnd #add this information back into the table
output$numBlocks = blockNum #add this information back into the table
}
return(output)
}
summary.generator<-function(x, importBlocks, parallel, nCores){
blockSum <-function(x,y, indivSeq, areaSeq){
blockDurFrame<-y[which(y$block == unname(unlist(x[1]))),]
indivSeqFrame <- data.frame(indivSeq, stringsAsFactors = TRUE)
summary.contacts<-apply(indivSeqFrame, 1, contSum, blockDurFrame, indivSeq, areaSeq)
indivSum.full<- data.frame(data.table::rbindlist(summary.contacts), stringsAsFactors = TRUE)
indivSum.full$block <- unname(unlist(x[1]))
#added 02/05/2019 - to maintain this new information created in the newest version of the contactDur functions.
indivSum.full$block.start <- unique(lubridate::as_datetime(blockDurFrame$block.start)) # updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
indivSum.full$block.end <- unique(lubridate::as_datetime(blockDurFrame$block.end)) # updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
indivSum.full$numBlocks <- unique(blockDurFrame$numBlocks)
return(indivSum.full)
}
contSum <-function(x,y, indivSeq, areaSeq){
me = (unname(unlist(x[1])))
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
indivContact1 <- y[c(which(as.character(y$dyadMember1) == me)),] #had to make this as.character b/c "me" is a factor with levels that may be different than y$dyadMember1
indivContact2 <- y[c(which(as.character(y$dyadMember2) == me)),] #had to make this as.character b/c "me" is a factor with levels that may be different than y$dyadMember2
}else{
indivContact1 <- y[c(which(as.character(y$indiv.id) == me)),]
indivContact2 <- matrix(nrow=0,ncol=0)
}
#Here identify the number of contact durations individuals had with others. How we do this is determined by the number of times individuals appear in y's dyadMember1 and dyadMember2 columns. Note that the only option if the function input originated from contactDur.area is (nrow(indivContact1) >= 1) & (nrow(indivContact2) == 0)
if((nrow(indivContact1) >= 1) & (nrow(indivContact2) >= 1)){
indivContact.full <- data.frame(data.table::rbindlist(list(indivContact1,indivContact2)), stringsAsFactors = TRUE)
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}
if((nrow(indivContact1) >= 1) & (nrow(indivContact2) == 0)){
indivContact.full <- indivContact1
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}else{
specIndivSeq1 = unique(as.character(indivContact.full$area.id)) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
}
}
if((nrow(indivContact2) >= 1) & (nrow(indivContact1) == 0)){
indivContact.full <- indivContact2
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}
if((nrow(indivContact2) == 0) & (nrow(indivContact1) == 0)){
indivContact.full <- indivContact1 #if neither indivContact1 or indivContact2 have any rows, indivContact.full won't have any rows either.
specIndivSeq1 = 0
}
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
if(nrow(indivContact.full) > 1){
indivSeqFrame1 <-data.frame(indivSeq, stringsAsFactors = TRUE)
contactSum<-apply(indivSeqFrame1, 1, distributeContacts1, indivContact.full, me)
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- length(specIndivSeq1)
sumTable$totalContactDurations = sum(indivContact.full$contactDuration)
sumTable[1,4:ncol(sumTable)] <- contactSum
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}else{ #if nrow !>0
if(nrow(indivContact.full) == 1){
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- 1
sumTable$totalContactDurations = indivContact.full$contactDuration
sumTable[1,4:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Indiv",specIndivSeq1, sep = ""), names(sumTable))] = indivContact.full$contactDuration
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}
if(nrow(indivContact.full) == 0){
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable[1,2:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}
}
}else{ # length(y$dyadMember1) == 0
if(nrow(indivContact.full) > 1){
areaSeqFrame <- data.frame(areaSeq, stringsAsFactors = TRUE)
contactSum<-apply(areaSeqFrame, 1, distributeContacts2, indivContact.full)
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- length(specIndivSeq1)
sumTable$totalContactDurations = sum(indivContact.full$contactDuration)
sumTable[1,4:ncol(sumTable)] <- contactSum
}else{ #if nrow !>1
if(nrow(indivContact.full) == 1){
areaVec <- unique(y$area.id)
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- 1
sumTable$totalContactDurations = indivContact.full$contactDuration
sumTable[1,4:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Area_",areaVec, sep = ""), names(sumTable))] = indivContact.full$contactDuration
}
if(nrow(indivContact.full) == 0){
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable[1,2:ncol(sumTable)] <- 0
}
}
}
return(sumTable)
}
distributeContacts1<- function(x,y, me){
if(unname(unlist(x[1])) == me){
spec.durations = 0
}else{
contact1 <- y[c(which(as.character(y$dyadMember1) == unname(unlist(x[1])))),]
contact2 <- y[c(which(as.character(y$dyadMember2) == unname(unlist(x[1])))),]
if((nrow(contact1) >= 1) & (nrow(contact2) >= 1)){
contact.full <- data.frame(data.table::rbindlist(list(contact1,contact2)), stringsAsFactors = TRUE)
}
if((nrow(contact1) >= 1) & (nrow(contact2) == 0)){
contact.full <- contact1
}
if((nrow(contact2) >= 1) & (nrow(contact1) == 0)){
contact.full <- contact2
}
if((nrow(contact2) == 0) & (nrow(contact1) == 0)){
contact.full <- contact1 #if neither contact1 or contact2 have any rows, contact.full won't have any rows either.
}
spec.durations <- ifelse(nrow(contact.full) >= 1, sum(contact.full$contactDuration),0)
}
return(spec.durations)
}
distributeContacts2<- function(x,y){
contact.full <- y[c(which(y$area.id == unname(unlist(x[1])))),]
spec.durations <- ifelse(nrow(contact.full) >= 1, sum(contact.full$contactDuration),0)
return(spec.durations)
}
if(importBlocks == TRUE){
if(length(x$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
x<-x[order(x$block,x$dyadMember1,x$dyadMember2),]
indivVec <- c(as.character(x[,match("dyadMember1", names(x))]), as.character(x[,match("dyadMember2", names(x))]))
areaSeq = NULL
}else{
x<-x[order(x$block,x$indiv.id),]
indivVec <- x[,match("indiv.id", names(x))]
areaVec <- x[,match("area.id", names(x))]
areaVec <- areaVec[order(areaVec)] #forces the data type to become character so that there will be no issues with apply functions later.
areaSeq<-as.character(unique(areaVec))
}
indivSeq <- unique(indivVec)
indivSeq<-indivSeq[order(indivSeq)]
indivSeq<-as.character(indivSeq) #forces the data type to become character so that there will be no issues with apply functions later.
if(parallel == TRUE){
cl <- parallel::makeCluster(nCores)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
summary.block<- foreach::foreach(i = unique(as.character(x$block))) %dopar% blockSum(i, x, indivSeq, areaSeq)
}else{ #if parallel == FALSE
blockVecFrame <- data.frame(unique(as.character(x$block)), stringsAsFactors = TRUE)
summary.block <- apply(blockVecFrame, 1, blockSum, x, indivSeq, areaSeq) #according to Dan, this apply function is faster than parApply, so I've removed the parApply option 1/17
}
summaryTable<- data.frame(data.table::rbindlist(summary.block), stringsAsFactors = TRUE)
summaryTable<-summaryTable[order(as.numeric(as.character(summaryTable$block)),summaryTable$id),]
}else{ #importBlocks == FALSE
if(length(x$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
x<-x[order(x$dyadMember1,x$dyadMember2),]
indivVec <- c(as.character(x[,match("dyadMember1", names(x))]), as.character(x[,match("dyadMember2", names(x))])) #as.character forces the data type to become character so that there will be no issues with apply functions later.
areaSeq = NULL
}else{
x<-x[order(x$indiv.id),]
indivVec <- x[,match("indiv.id", names(x))]
areaVec <- x[,match("area.id", names(x))]
areaVec <- areaVec[order(areaVec)] #forces the data type to become character so that there will be no issues with apply functions later.
areaSeq<-as.character(unique(areaVec))
}
indivSeq <- unique(indivVec)
indivSeq<-indivSeq[order(indivSeq)]
indivSeq<-as.character(indivSeq) #forces the data type to become character so that there will be no issues with apply functions later.
indivSeqFrame <- data.frame(indivSeq, stringsAsFactors = TRUE)
summary.contacts <- apply(indivSeqFrame, 1, contSum, x, indivSeq, areaSeq) #according to Dan, this apply function is faster than parApply, so I've removed the parApply option 1/17
summaryTable<- data.frame(data.table::rbindlist(summary.contacts), stringsAsFactors = TRUE)
summaryTable<-summaryTable[order(summaryTable$id),]
}
return(summaryTable)
}
if(is.data.frame(x) == FALSE & is.list(x) == TRUE){
summaryList<-lapply(x, summary.generator, importBlocks, parallel, nCores) #changed to lapply 02/02/2019
if(avg == TRUE){
full.summary<- data.frame(data.table::rbindlist(summaryList, fill = TRUE), stringsAsFactors = TRUE) #Now we need to average the number of contacts by id and block
idSeq<-unique(full.summary$id)
if(importBlocks == TRUE){
blockSeq<-unique(full.summary$block)
sumTab <- apply(data.frame(blockSeq, stringsAsFactors = TRUE), 1, summaryAgg.block, y = full.summary)
sumTab.agg <- data.frame(data.table::rbindlist(sumTab), stringsAsFactors = TRUE)
}else{ #if importBlocks == FALSE
sumTab.agg<-stats::aggregate(full.summary[,-match("id", colnames(full.summary))], list(id = full.summary$id), mean) #this not only calculates the mean of each column by id, but also adds the "id" column back into the data set.
}
summary.output<-list(sumTab.agg, summaryList)
names(summary.output)<-c("avg.","contactSummaries.")
}else{ #if avg == FALSE
summary.output<- summaryList
}
}else{ #if x is NOT a list
summary.output <- summary.generator(x, importBlocks, parallel, nCores)
}
return(summary.output)
}
edgeGenerator.noBlock<-function(x, removeDuplicates = dupAction, par = parallel, cores = nCores){
confirm_edges.noBlock<-function(x,y){ #x = potential edges, y = contactSummary; essentially presents contact summary results in long form
if(length(levels(unname(unlist(x[1])))) > 1){ #This has to be here to avoid an error when trying to coerce output into the out.frame
x1.id <- droplevels(unname(unlist(x[1])))
}else{ #if the number of levels <= 1
x1.id <-unname(unlist(x[1]))
}
if(length(levels(unname(unlist(x[2])))) > 1){ #This has to be here to avoid an error when trying to coerce output into the out.frame
x2.id <- droplevels(unname(unlist(x[2])))
}else{ #if the number of levels <= 1
x2.id <-unname(unlist(x[2]))
}
out.frame<-data.frame(from = x1.id, to = x2.id, stringsAsFactors = TRUE) #must be made into a data frame to avoid the "listCoercing LHS to a list" warning.
y.ContactNames<-c(NA,NA,NA,substring((names(y[grep("contactDuration_", names(y))])),22)) #The three NAs represent the first 3 columns in y, which are irrelevent for our needs.
duration <- unname(unlist(y[which(y$id == x1.id), which(y.ContactNames == x2.id)]))
duration.corrected<-ifelse(duration > 0, duration, NA)
out.frame$durations<-duration.corrected #this is the total number of durations individuals were observed in contact with others
return(out.frame)
}
contactSummary<-summarizeContacts(x, importBlocks = FALSE, parallel = par, nCores = cores) #generate a summary of the contact table using the summarizeContacts function (available in the package as a stand-alone function)
if(is.data.frame(contactSummary) == FALSE & is.list(contactSummary) == TRUE){
if (parallel == TRUE){
cl <- parallel::makeCluster(nCores)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
confirmed_edges.list <- foreach::foreach(k = 1:length(contactSummary), .packages = "foreach") %dopar% {
contactSummary.frame <- contactSummary[[k]]
contactSummary.node1 <- unique(contactSummary.frame$id)
contactSummary.node2 <- substring((names(contactSummary.frame[grep("contactDuration_", names(contactSummary.frame))])),22) #pulls out the contacted IDs
potential_edges <- expand.grid(contactSummary.node1, contactSummary.node2, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges) <- c("from", "to")
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(potential_edges, directed = FALSE), remove.multiple = TRUE) #create a simplified network object
potential_edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
edgelist<-apply(potential_edges,1,confirm_edges.noBlock, y=contactSummary.frame) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}else{ #if parallel == FALSE
confirmed_edges.list <- foreach::foreach(k = 1:length(contactSummary), .packages = "foreach") %do% {
contactSummary.frame <- contactSummary[[k]]
contactSummary.node1 <- unique(contactSummary.frame$id)
contactSummary.node2 <- substring((names(contactSummary.frame[grep("contactDuration_", names(contactSummary.frame))])),22) #pulls out the contacted IDs
potential_edges <- expand.grid(contactSummary.node1, contactSummary.node2, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges) <- c("from", "to")
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(potential_edges, directed = FALSE), remove.multiple = TRUE) #create a simplified network object
potential_edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
edgelist<-apply(potential_edges,1,confirm_edges.noBlock, y=contactSummary.frame) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}
return(confirmed_edges.list)
}else{ #if contactSummary is not a list of data frames
contactSummary.node1 <- unique(contactSummary$id)
contactSummary.node2 <- substring((names(contactSummary[grep("contactDuration_", names(contactSummary))])),22) #pulls out the contacted IDs
potential_edges <- expand.grid(contactSummary.node1, contactSummary.node2, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges) <- c("from", "to")
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(potential_edges, directed = FALSE), remove.multiple = TRUE) #create a simplified network object (to increase efficiency below)
potential_edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
edgelist<-apply(potential_edges,1,confirm_edges.noBlock, y=contactSummary) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}
edgeGenerator.Block<-function(x, removeDuplicates = dupAction, par = parallel, cores = nCores){
block <-NULL #bind this to the global environment to prevent a check flag.
confirm_edges.Block<-function(x,y){ #x = potential edges, y = contactSummary; essentially presents contact summary results in long form
if(length(levels(unname(unlist(x[2])))) > 1){ #This has to be here to avoid an error when trying to coerce output into the out.frame
x2.id <- droplevels(unname(unlist(x[2])))
}else{ #if the number of levels <= 1
x2.id <-unname(unlist(x[2]))
}
if(length(levels(unname(unlist(x[3])))) > 1){ #This has to be here to avoid an error when trying to coerce output into the out.frame
x3.id <- droplevels(unname(unlist(x[3])))
}else{ #if the number of levels <= 1
x3.id <-unname(unlist(x[3]))
}
out.frame<-data.frame(from = x2.id, to = x3.id, stringsAsFactors = TRUE) #must be made into a data frame to avoid the "listCoercing LHS to a list" warning.
y.ContactNames<-c(NA,NA,NA,substring((names(y[grep("contactDuration_", names(y))])),22)) #The three NAs represent the first 3 columns in y, which are irrelevent for our needs.
duration <- unname(unlist(y[which(y$id == x2.id & y$block == unname(unlist(x[1]))), which(y.ContactNames == x3.id)]))
duration.corrected<-ifelse(duration > 0, duration, NA)
out.frame$durations<-duration.corrected #this is the total number of durations individuals were observed in contact with others
out.frame$block <- unname(unlist(x[1])) #add block information
out.frame$block.start <- unname(unlist(as.character(x[4])))
out.frame$block.end <- unname(unlist(as.character(x[5])))
return(out.frame)
}
contactSummary<-summarizeContacts(x, importBlocks = TRUE, parallel = par, nCores = cores) #generate a summary of the contact table using the summarizeContacts function (available in the package as a stand-alone function)
if(is.data.frame(contactSummary) == FALSE & is.list(contactSummary) == TRUE){
if (parallel == TRUE){
cl <- parallel::makeCluster(nCores)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
confirmed_edges.list <- foreach::foreach(k = 1:length(contactSummary), .packages = "foreach") %dopar% {
contactSummary.frame <- contactSummary[[k]]
contactSummary.node1 <- unique(contactSummary.frame$id)
contactSummary.node2 <- substring((names(contactSummary.frame[grep("contactDuration_", names(contactSummary.frame))])),22) #pulls out the contacted IDs
block_info<-data.frame(block = unique(contactSummary.frame$block), block.start = unique(contactSummary.frame$block.start), block.end = unique(contactSummary.frame$block.end), stringsAsFactors = TRUE) #pulls the block information from contactSummary
potential_edges1 <- expand.grid(contactSummary.node1, contactSummary.node2, block_info$block, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges1) <- c("from", "to", "block")
potential_edges2<-merge(potential_edges1, block_info, by = "block") #note that this merge reorders the columns, by placing the "block" column first.
potential_edges2<-potential_edges2[order(as.numeric(as.character(potential_edges2$block))),] #orders rows by ascending block number.
blocks.adjusted<- foreach::foreach(j = unique(potential_edges2$block), .packages = "foreach") %do% { #creates a vector of rows describing duplicated edges.
blockSub<- droplevels(subset(potential_edges2, block == j)) #subset the data by block
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(blockSub[,c(2,3)], directed = FALSE), remove.multiple = TRUE) #create a simplified network object. Note that we exclude block info from this function (because it will return an error if included)
blockSub.edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
blockSub.out <- data.frame(block = unique(blockSub$block), from = blockSub.edges$from, to = blockSub.edges$to, block.start = unique(blockSub$block.start), block.end = unique(blockSub$block.end)) #append block info to the simplified data frame & ensure that columns are arranged as they were in potentialEdges2
return(blockSub.out) #return the blockSub.out data frame
}
potential_edges2 <- data.frame(data.table::rbindlist(blocks.adjusted)) #bind the adjusted block sets together to redefine potential_edges2
edgelist<-apply(potential_edges2,1,confirm_edges.Block, y=contactSummary.frame) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}else{ #if parallel == FALSE
confirmed_edges.list <- foreach::foreach(k = 1:length(contactSummary), .packages = "foreach") %do% {
contactSummary.frame <- contactSummary[[k]]
contactSummary.node1 <- unique(contactSummary.frame$id)
contactSummary.node2 <- substring((names(contactSummary.frame[grep("contactDuration_", names(contactSummary.frame))])),22) #pulls out the contacted IDs
block_info<-data.frame(block = unique(contactSummary.frame$block), block.start = unique(contactSummary.frame$block.start), block.end = unique(contactSummary.frame$block.end), stringsAsFactors = TRUE) #pulls the block information from contactSummary
potential_edges1 <- expand.grid(contactSummary.node1, contactSummary.node2, block_info$block, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges1) <- c("from", "to", "block")
potential_edges2<-merge(potential_edges1, block_info, by = "block") #note that this merge reorders the columns, by placing the "block" column first.
potential_edges2<-potential_edges2[order(as.numeric(as.character(potential_edges2$block))),] #orders rows by ascending block number.
blocks.adjusted<- foreach::foreach(j = unique(potential_edges2$block), .packages = "foreach") %do% { #creates a vector of rows describing duplicated edges.
blockSub<- droplevels(subset(potential_edges2, block == j)) #subset the data by block
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(blockSub[,c(2,3)], directed = FALSE), remove.multiple = TRUE) #create a simplified network object. Note that we exclude block info from this function (because it will return an error if included)
blockSub.edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
blockSub.out <- data.frame(block = unique(blockSub$block), from = blockSub.edges$from, to = blockSub.edges$to, block.start = unique(blockSub$block.start), block.end = unique(blockSub$block.end)) #append block info to the simplified data frame & ensure that columns are arranged as they were in potentialEdges2
return(blockSub.out) #return the blockSub.out data frame
}
potential_edges2 <- data.frame(data.table::rbindlist(blocks.adjusted)) #bind the adjusted block sets together to redefine potential_edges2
edgelist<-apply(potential_edges2,1,confirm_edges.Block, y=contactSummary.frame) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}
return(confirmed_edges.list)
}else{ #if contactSummary is NOT a list of data frames
contactSummary.node1 <- unique(contactSummary$id)
contactSummary.node2 <- substring((names(contactSummary[grep("contactDuration_", names(contactSummary))])),22) #pulls out the contacted IDs
block_info<-data.frame(block = unique(contactSummary$block), block.start = unique(contactSummary$block.start), block.end = unique(contactSummary$block.end), stringsAsFactors = TRUE) #pulls the block information from contactSummary
potential_edges1 <- expand.grid(contactSummary.node1, contactSummary.node2, block_info$block, stringsAsFactors = TRUE) #create a data frame detailing all the potential edges that may have occurred in the dataset (including loops, but loops will ultimately be removed later).
names(potential_edges1) <- c("from", "to", "block")
potential_edges2<-merge(potential_edges1, block_info, by = "block") #note that this merge reorders the columns, by placing the "block" column first.
potential_edges2<-potential_edges2[order(as.numeric(as.character(potential_edges2$block))),] #orders rows by ascending block number.
blocks.adjusted<- foreach::foreach(j = unique(potential_edges2$block), .packages = "foreach") %do% { #For efficiency remove duplicated potential edges
blockSub<- droplevels(subset(potential_edges2, block == j)) #subset the data by block
potential.ntwrk <- igraph::simplify(igraph::graph_from_data_frame(blockSub[,c(2,3)], directed = FALSE), remove.multiple = TRUE) #create a simplified network object. Note that we exclude block info from this function (because it will return an error if included)
blockSub.edges <- igraph::as_data_frame(potential.ntwrk) #convert back to data frame
blockSub.out <- data.frame(block = unique(blockSub$block), from = blockSub.edges$from, to = blockSub.edges$to, block.start = unique(blockSub$block.start), block.end = unique(blockSub$block.end)) #append block info to the simplified data frame & ensure that columns are arranged as they were in potentialEdges2
return(blockSub.out) #return the blockSub.out data frame
}
potential_edges2 <- data.frame(data.table::rbindlist(blocks.adjusted)) #bind the adjusted block sets together to redefine potential_edges2
edgelist<-apply(potential_edges2,1,confirm_edges.Block, y=contactSummary) #confirm whether edges existed or not. If "durations" is NA, then no edge existed.
edgeFrame<-data.frame(data.table::rbindlist(edgelist), stringsAsFactors = TRUE)
confirmed_edges <- edgeFrame[is.na(edgeFrame$duration) == FALSE,] #So now we have a data frame detailing the undirected edges we observed and the number of contacts associated with them.
if(removeDuplicates == FALSE){ #if the user wants duplicate edges, we bind confirmed_edges to a reflected version of itself
confirmed_edges.reflected <- confirmed_edges
confirmed_edges.reflected[,c(1,2)] <- confirmed_edges.reflected[,c(2,1)] #flip to/from nodes
confirmed_edges <- data.frame(data.table::rbindlist(list(confirmed_edges, confirmed_edges.reflected))) #update confirmed_edges
}
rownames(confirmed_edges)<-seq(1,nrow(confirmed_edges))
return(confirmed_edges)
}
}
if(importBlocks == FALSE){
edgeSet<-edgeGenerator.noBlock(x)
}else{ #i.e., if importBlocks == TRUE
edgeSet<-edgeGenerator.Block(x)
}
return(edgeSet)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.