Nothing
#' Summarize Contact Events
#'
#' This function takes the output from contactDur.all or contactDur.area and
#' reports the number of durations when tracked individuals are in "contact"
#' with one another (contactDur.all) or with specified fixed points/polygons
#' (contactDur.area).
#'
#' If x is a list, and avg == TRUE, this function will produce an extra data
#' frame containing the mean column values for each id (per block if
#' importBlocks == TRUE).
#'
#' This is a sub-function found within the contactTest and ntwrkEdges function.
#' @param x Output from the contactDur.all or contactDur.area functions. Can
#' be either a data frame or list of data frames.
#' @param importBlocks Logical. If true, each block in x will be analyzed
#' separately. Defaults to FALSE. Note that the "block" column must exist
#' in x.
#' @param avg Logical. If TRUE, summary output from all data frames contained
#' in x will be averaged together. Output will produce an extra data frame
#' containing the mean column values for each id (per block if
#' importBlocks == TRUE). Defaults to FALSE.
#' @param parallel Logical. If TRUE, sub-functions within the summarizeContacts
#' wrapper will be parallelized. Note that the only sub-function
#' parallelized here is called ONLY when importBlocks == TRUE.
#' @param nCores Integer. Describes the number of cores to be dedicated to
#' parallel processes. Defaults to half of the maximum number of cores
#' available (i.e., (parallel::detectCores()/2)).
#' @keywords data-processing contact sub-function
#' @return Returns a data frame (or list of data frames if \code{x} is a
#' list of data frames) with the following columns:
#'
#' \item{id}{The unique ID of a tracked individual for which we will
#' summarize to all other individuals/fixed locations observed in \code{x}.}
#' \item{id}{Sum number of individuals/fixed locations observed in contact
#' specific individuals.}
#' \item{id}{Sum number of contacts associated with specific individuals.}
#' \item{contactDuration_...}{Number of contacts between specific dyads.}
#'
#' If importBlocks == TRUE, the following columns are appended to the output
#' data frame described above:
#'
#' \item{block}{Integer ID describing unique blocks of time during which
#' contacts occur.}
#' \item{block.start}{The timepoint in \code{x} at which the \code{block}
#' begins.}
#' \item{block.end}{The timepoint in \code{x} at which the \code{block}
#' ends.}
#' \item{numBlocks}{Integer describing the total number of time blocks
#' observed within \code{x} at which the \code{block}}
#' @import foreach
#' @export
#' @examples
#' data(calves)
#'
#' calves.dateTime<-datetime.append(calves, date = calves$date,
#' time = calves$time) #create a dataframe with dateTime identifiers for location fixes
#'
#' calves.agg<-tempAggregate(calves.dateTime, id = calves.dateTime$calftag,
#' dateTime = calves.dateTime$dateTime, point.x = calves.dateTime$x,
#' point.y = calves.dateTime$y, secondAgg = 300, extrapolate.left = FALSE,
#' extrapolate.right = FALSE, resolutionLevel = "reduced", parallel = FALSE,
#' na.rm = TRUE, smooth.type = 1) #smooth to 5-min fix intervals.
#'
#' calves.dist<-dist2All_df(x = calves.agg, parallel = FALSE,
#' dataType = "Point", lonlat = FALSE)
#' calves.contact.block<-contactDur.all(x = calves.dist, dist.threshold=1,
#' sec.threshold=10, blocking = TRUE, blockUnit = "hours", blockLength = 1,
#' equidistant.time = FALSE, parallel = FALSE, reportParameters = TRUE)
#'
#' calves.contactSumm.NOblock <- summarizeContacts(calves.contact.block)
#' head(calves.contactSumm.NOblock)
#'
#' calves.contactSumm.block <- summarizeContacts(calves.contact.block,
#' importBlocks = TRUE)
#' head(calves.contactSumm.block)
summarizeContacts<- function(x, importBlocks = FALSE, avg = FALSE, parallel = FALSE, nCores = (parallel::detectCores()/2)){
#bind the following variables to the global environment so that the CRAN check doesn't flag them as potential problems
i <- NULL
#write sub-functions
summaryAgg.block<-function(x,y){ #calculates the mean potential contacts by id and block. Using this apply function is faster than simply aggregating the data set by id and block
sumTable<-y[which(y$block == unname(unlist(x[1]))),]
if(nrow(sumTable) == 0){output <- NULL #if there's nothing in the subset, the function will not proceed any further.
}else{
blockStart<- unique(lubridate::as_datetime(sumTable$block.start)) #added 02/05/2019 - had to keep track of this new information ; updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
blockEnd<- unique(lubridate::as_datetime(sumTable$block.end)) #added 02/05/2019 - had to keep track of this new information ; updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
blockNum<- unique(sumTable$numBlocks)
sumTable.redac<-sumTable[,-c(match("id", names(sumTable)), match("block", names(sumTable)), match("block.start", names(sumTable)), match("block.end", names(sumTable)), match("numBlocks", names(sumTable)))] #Remove the columns that cannot/shoud not be averaged.
output<-stats::aggregate(sumTable.redac, list(id = sumTable$id), mean) #this not only calculates the mean of each column by id, but also adds the "id" column back into the data set.
output$block = unname(unlist(x[1])) #add this information back into the table
output$block.start = blockStart #add this information back into the table
output$block.end = blockEnd #add this information back into the table
output$numBlocks = blockNum #add this information back into the table
}
return(output)
}
summary.generator<-function(x, importBlocks, parallel, nCores){
blockSum <-function(x,y, indivSeq, areaSeq){
blockDurFrame<-y[which(y$block == unname(unlist(x[1]))),]
indivSeqFrame <- data.frame(indivSeq, stringsAsFactors = TRUE)
summary.contacts<-apply(indivSeqFrame, 1, contSum, blockDurFrame, indivSeq, areaSeq)
indivSum.full<- data.frame(data.table::rbindlist(summary.contacts), stringsAsFactors = TRUE)
indivSum.full$block <- unname(unlist(x[1]))
#added 02/05/2019 - to maintain this new information created in the newest version of the contactDur functions.
indivSum.full$block.start <- unique(lubridate::as_datetime(blockDurFrame$block.start)) # updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
indivSum.full$block.end <- unique(lubridate::as_datetime(blockDurFrame$block.end)) # updated 06/02/2019 - converted the factor data to POSIXct format in order to avoid a "length is too large for hashing" error.
indivSum.full$numBlocks <- unique(blockDurFrame$numBlocks)
return(indivSum.full)
}
contSum <-function(x,y, indivSeq, areaSeq){
me = (unname(unlist(x[1])))
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
indivContact1 <- y[c(which(as.character(y$dyadMember1) == me)),] #had to make this as.character b/c "me" is a factor with levels that may be different than y$dyadMember1
indivContact2 <- y[c(which(as.character(y$dyadMember2) == me)),] #had to make this as.character b/c "me" is a factor with levels that may be different than y$dyadMember2
}else{
indivContact1 <- y[c(which(as.character(y$indiv.id) == me)),]
indivContact2 <- matrix(nrow=0,ncol=0)
}
#Here identify the number of contact durations individuals had with others. How we do this is determined by the number of times individuals appear in y's dyadMember1 and dyadMember2 columns. Note that the only option if the function input originated from contactDur.area is (nrow(indivContact1) >= 1) & (nrow(indivContact2) == 0)
if((nrow(indivContact1) >= 1) & (nrow(indivContact2) >= 1)){
indivContact.full <- data.frame(data.table::rbindlist(list(indivContact1,indivContact2)), stringsAsFactors = TRUE)
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}
if((nrow(indivContact1) >= 1) & (nrow(indivContact2) == 0)){
indivContact.full <- indivContact1
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}else{
specIndivSeq1 = unique(as.character(indivContact.full$area.id)) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
}
}
if((nrow(indivContact2) >= 1) & (nrow(indivContact1) == 0)){
indivContact.full <- indivContact2
specIndivSeq = unique(c(as.character(indivContact.full$dyadMember1),as.character(indivContact.full$dyadMember2))) #had to add as.character call because contactDur functions now produce factor data. 02/05/2019
specIndivSeq1 = specIndivSeq[-which(specIndivSeq == me)]
}
if((nrow(indivContact2) == 0) & (nrow(indivContact1) == 0)){
indivContact.full <- indivContact1 #if neither indivContact1 or indivContact2 have any rows, indivContact.full won't have any rows either.
specIndivSeq1 = 0
}
if(length(y$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
if(nrow(indivContact.full) > 1){
indivSeqFrame1 <-data.frame(indivSeq, stringsAsFactors = TRUE)
contactSum<-apply(indivSeqFrame1, 1, distributeContacts1, indivContact.full, me)
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- length(specIndivSeq1)
sumTable$totalContactDurations = sum(indivContact.full$contactDuration)
sumTable[1,4:ncol(sumTable)] <- contactSum
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}else{ #if nrow !>0
if(nrow(indivContact.full) == 1){
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- 1
sumTable$totalContactDurations = indivContact.full$contactDuration
sumTable[1,4:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Indiv",specIndivSeq1, sep = ""), names(sumTable))] = indivContact.full$contactDuration
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}
if(nrow(indivContact.full) == 0){
sumTable <- data.frame(matrix(ncol = (3+length(indivSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Indiv",indivSeq, sep = ""))
sumTable$id = me
sumTable[1,2:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Indiv",me, sep = ""), names(sumTable))] = NA
}
}
}else{ # length(y$dyadMember1) == 0
if(nrow(indivContact.full) > 1){
areaSeqFrame <- data.frame(areaSeq, stringsAsFactors = TRUE)
contactSum<-apply(areaSeqFrame, 1, distributeContacts2, indivContact.full)
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- length(specIndivSeq1)
sumTable$totalContactDurations = sum(indivContact.full$contactDuration)
sumTable[1,4:ncol(sumTable)] <- contactSum
}else{ #if nrow !>1
if(nrow(indivContact.full) == 1){
areaVec <- unique(y$area.id)
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable$totalDegree <- 1
sumTable$totalContactDurations = indivContact.full$contactDuration
sumTable[1,4:ncol(sumTable)] <- 0
sumTable[,match(paste("contactDuration_Area_",areaVec, sep = ""), names(sumTable))] = indivContact.full$contactDuration
}
if(nrow(indivContact.full) == 0){
sumTable <- data.frame(matrix(ncol = (3+length(areaSeq)), nrow = 1), stringsAsFactors = TRUE)
colnames(sumTable) <- c("id","totalDegree","totalContactDurations", paste("contactDuration_Area_",areaSeq, sep = ""))
sumTable$id = me
sumTable[1,2:ncol(sumTable)] <- 0
}
}
}
return(sumTable)
}
distributeContacts1<- function(x,y, me){
if(unname(unlist(x[1])) == me){
spec.durations = 0
}else{
contact1 <- y[c(which(as.character(y$dyadMember1) == unname(unlist(x[1])))),]
contact2 <- y[c(which(as.character(y$dyadMember2) == unname(unlist(x[1])))),]
if((nrow(contact1) >= 1) & (nrow(contact2) >= 1)){
contact.full <- data.frame(data.table::rbindlist(list(contact1,contact2)), stringsAsFactors = TRUE)
}
if((nrow(contact1) >= 1) & (nrow(contact2) == 0)){
contact.full <- contact1
}
if((nrow(contact2) >= 1) & (nrow(contact1) == 0)){
contact.full <- contact2
}
if((nrow(contact2) == 0) & (nrow(contact1) == 0)){
contact.full <- contact1 #if neither contact1 or contact2 have any rows, contact.full won't have any rows either.
}
spec.durations <- ifelse(nrow(contact.full) >= 1, sum(contact.full$contactDuration),0)
}
return(spec.durations)
}
distributeContacts2<- function(x,y){
contact.full <- y[c(which(y$area.id == unname(unlist(x[1])))),]
spec.durations <- ifelse(nrow(contact.full) >= 1, sum(contact.full$contactDuration),0)
return(spec.durations)
}
if(importBlocks == TRUE){
if(length(x$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
x<-x[order(x$block,x$dyadMember1,x$dyadMember2),]
indivVec <- c(as.character(x[,match("dyadMember1", names(x))]), as.character(x[,match("dyadMember2", names(x))]))
areaSeq = NULL
}else{
x<-x[order(x$block,x$indiv.id),]
indivVec <- x[,match("indiv.id", names(x))]
areaVec <- x[,match("area.id", names(x))]
areaVec <- areaVec[order(areaVec)] #forces the data type to become character so that there will be no issues with apply functions later.
areaSeq<-as.character(unique(areaVec))
}
indivSeq <- unique(indivVec)
indivSeq<-indivSeq[order(indivSeq)]
indivSeq<-as.character(indivSeq) #forces the data type to become character so that there will be no issues with apply functions later.
if(parallel == TRUE){
cl <- parallel::makeCluster(nCores)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
summary.block<- foreach::foreach(i = unique(as.character(x$block))) %dopar% blockSum(i, x, indivSeq, areaSeq)
}else{ #if parallel == FALSE
blockVecFrame <- data.frame(unique(as.character(x$block)), stringsAsFactors = TRUE)
summary.block <- apply(blockVecFrame, 1, blockSum, x, indivSeq, areaSeq) #according to Dan, this apply function is faster than parApply, so I've removed the parApply option 1/17
}
summaryTable<- data.frame(data.table::rbindlist(summary.block), stringsAsFactors = TRUE)
summaryTable<-summaryTable[order(as.numeric(as.character(summaryTable$block)),summaryTable$id),]
}else{ #importBlocks == FALSE
if(length(x$dyadMember1) > 0){ #This essentially determines if the input was created with dist.all or distToArea. If length(dyadMember1) >0, it was created with dist.all
x<-x[order(x$dyadMember1,x$dyadMember2),]
indivVec <- c(as.character(x[,match("dyadMember1", names(x))]), as.character(x[,match("dyadMember2", names(x))])) #as.character forces the data type to become character so that there will be no issues with apply functions later.
areaSeq = NULL
}else{
x<-x[order(x$indiv.id),]
indivVec <- x[,match("indiv.id", names(x))]
areaVec <- x[,match("area.id", names(x))]
areaVec <- areaVec[order(areaVec)] #forces the data type to become character so that there will be no issues with apply functions later.
areaSeq<-as.character(unique(areaVec))
}
indivSeq <- unique(indivVec)
indivSeq<-indivSeq[order(indivSeq)]
indivSeq<-as.character(indivSeq) #forces the data type to become character so that there will be no issues with apply functions later.
indivSeqFrame <- data.frame(indivSeq, stringsAsFactors = TRUE)
summary.contacts <- apply(indivSeqFrame, 1, contSum, x, indivSeq, areaSeq) #according to Dan, this apply function is faster than parApply, so I've removed the parApply option 1/17
summaryTable<- data.frame(data.table::rbindlist(summary.contacts), stringsAsFactors = TRUE)
summaryTable<-summaryTable[order(summaryTable$id),]
}
return(summaryTable)
}
if(is.data.frame(x) == FALSE & is.list(x) == TRUE){
summaryList<-lapply(x, summary.generator, importBlocks, parallel, nCores) #changed to lapply 02/02/2019
if(avg == TRUE){
full.summary<- data.frame(data.table::rbindlist(summaryList, fill = TRUE), stringsAsFactors = TRUE) #Now we need to average the number of contacts by id and block
idSeq<-unique(full.summary$id)
if(importBlocks == TRUE){
blockSeq<-unique(full.summary$block)
sumTab <- apply(data.frame(blockSeq, stringsAsFactors = TRUE), 1, summaryAgg.block, y = full.summary)
sumTab.agg <- data.frame(data.table::rbindlist(sumTab), stringsAsFactors = TRUE)
}else{ #if importBlocks == FALSE
sumTab.agg<-stats::aggregate(full.summary[,-match("id", colnames(full.summary))], list(id = full.summary$id), mean) #this not only calculates the mean of each column by id, but also adds the "id" column back into the data set.
}
summary.output<-list(sumTab.agg, summaryList)
names(summary.output)<-c("avg.","contactSummaries.")
}else{ #if avg == FALSE
summary.output<- summaryList
}
}else{ #if x is NOT a list
summary.output <- summary.generator(x, importBlocks, parallel, nCores)
}
return(summary.output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.