# R/potentialDurations.R In contact: Creating Contact and Social Networks

#### Documented in potentialDurations

#' Identify Potential Contact Durations
#'
#' This function uses the output from dist2... functions to determine the
#'    potential maximum number of direct-contact durations between
#'    individuals in a data set. The max number of durations potentially
#'    observed is the number of TSWs both individuals (or an individual and
#'    fixed area) were simulataneously observed at the same time over the
#'    study period/temporal block.
#'
#'  Please note that this function assumes the desired minimum contact
#'     duration (MCD), as defined by Dawson et al. (2019), is 1 (i.e., a
#'     "contact" occurs when individuals are within a specified distance
#'     threshold for a single timestep). In a future version of this
#'     function we will aim to increase flexability by allowing for variable
#'     MCD values. For further clarification on the MCD definition and various
#'     contact-determination assumptions, please see:
#'
#'    Dawson, D.E., Farthing, T.S., Sanderson, M.W., and Lanzas, C.
#'    2019. Transmission on empirical dynamic contact networks is influenced by
#'    data processing decisions. Epidemics 26:32-42.
#'    https://doi.org/10.1016/j.epidem.2018.08.003/
#'
#' @param x Output from the dist2All or dist2Area function. Can be either a
#'     data frame or non-data-frame list.
#' @param blocking Logical. If TRUE, contacts will be evaluated for temporal
#'     blocks spanning blockLength blockUnit (e.g., 6 hours) within the data
#'     set. Defaults to FALSE.
#' @param blockLength Integer. Describes the number blockUnits within each
#'     temporal block. Defaults to 1.
#' @param blockUnit Character string taking the values: "secs," "mins,"
#'     "hours," "days," or "weeks." Describes the temporal unit associated with
#'     each block. Defaults to "hours."
#' @param blockingStartTime Character string or date object describing the date
#'     OR dateTime starting point of the first time block. For example, if
#'     blockingStartTime = "2016-05-01" OR "2016-05-01 00:00:00", the first
#'     timeblock would begin at "2016-05-01 00:00:00." If NULL, the
#'     blockingStartTime defaults to the minimum dateTime point in x. Note:
#'     any blockingStartTime MUST precede or be equivalent to the minimum
#'     timepoint in x. Additional note: If blockingStartTime is a character
#'     string, it must be in the format ymd OR ymd hms.
#' @param distFunction Character string taking the values: "dist2All_df",
#'     or "dist2Area_df." Describes the contact-package function used to
#'     generate x.
#'
#' @keywords data-processing contact
#' @return Returns a data frame (or list of data frames if \code{x} is a
#'    list of data frames) with the following columns:
#'
#'    \item{id}{The unique ID of an individual observed in the data set.}
#'    \item{potenDegree}{The maximum degree possible for individual \code{id}
#'    based on the number of other individuals observed during the time
#'    period.}
#'    \item{potenTotalContactDurations}{The maximum number of contact durations
#'    individual \code{id} may experience during the time period.}
#'    \item{potenContactDurations_...}{The maximum number of contact durations
#'    individual \code{id} may experience with each specific individual/fixed
#'    area during the time period.}
#'
#'    If blocking == TRUE, the following columns are appended to the output
#'    data frame described above:
#'
#'    \item{block}{Integer ID describing unique blocks of time during which
#'    contacts may occur.}
#'    \item{block.start}{The timepoint in \code{x} at which the \code{block}
#'    begins.}
#'    \item{block.end}{The timepoint in \code{x} at which the \code{block}
#'    ends.}
#' @import foreach
#' @export
#' @examples
#'
#' data(calves)
#'
#' calves.dateTime<-datetime.append(calves, date = calves$date, time = #' calves$time) #create a dataframe with dateTime identifiers for location foxes
#'
#' calves.agg<-tempAggregate(calves.dateTime, id = calves.dateTime$calftag, #' dateTime = calves.dateTime$dateTime, point.x = calves.dateTime$x, #' point.y = calves.dateTime$y, secondAgg = 300, extrapolate.left = FALSE,
#'     extrapolate.right = FALSE, resolutionLevel = "reduced", parallel = FALSE,
#'     na.rm = TRUE, smooth.type = 1) #smooth locations to 5-min fix intervals.
#'
#' calves.dist<-dist2All_df(x = calves.agg, parallel = FALSE, dataType = "Point",
#'     lonlat = FALSE) #calculate distance between all individuals at each timepoint
#'
#' calves.potentialContacts<-potentialDurations(x = calves.dist, blocking = FALSE)

potentialDurations<-function(x, blocking = FALSE, blockLength = 1, blockUnit = "hours", blockingStartTime = NULL, distFunction = "dist2All_df"){

#bind the following variables to the global environment so that the CRAN check doesn't flag them as potential problems
listBreak <- NULL
breakBlock <- NULL
j <- NULL
l <- NULL
m <- NULL

thisEnvironment<-environment() #tag the environment of the parent function so that sub-functions may work within it.

if(is.data.frame(x) == FALSE & is.list(x) == TRUE){ #if the x input is a list of data frames

y<-x #rename x to prevent function confusion later on
rm(x) #remove x to restore memory

listBreakFrame<-data.frame(seq(1:length(y)), stringsAsFactors = TRUE)

listBreak.function <- function(m, environmentTag = thisEnvironment){
#subEnvir1 <- environment() #tag the environment of the apply function
assign("listBreak", m, envir = environmentTag) #assign a "listBreak" object in the parent environment that takes the same value as x in apply function

eval(expr = {

x<- y[[listBreak]] #break the original input into a single data frame
y[[listBreak]] <- NULL #replace data frame of interest in the original input with NULL to free up memory

if(blocking == TRUE){

#the code immediately below comes from the contact::timeBlock.append function. We just refrain from calling it here to prevent the inputs being cloned within the function and because we don't need ALL the info that particular function would append to x. Thus, we save time and memory.

if(blockUnit == "Secs" || blockUnit == "SECS" || blockUnit == "secs"){
blockLength1 <- blockLength
}
if(blockUnit == "Mins" || blockUnit == "MINS" || blockUnit == "mins"){
blockLength1 <- blockLength*60 #num seconds in a minute
}
if(blockUnit == "Hours" || blockUnit == "HOURS" || blockUnit == "hours"){
blockLength1 <- blockLength*60*60 #num seconds in an hour
}
if(blockUnit == "Days" || blockUnit == "DAYS" || blockUnit == "days"){
blockLength1 <- blockLength*60*60*24 #num seconds in a day
}
if(blockUnit == "Weeks" || blockUnit == "WEEKS" || blockUnit == "weeks"){
blockLength1 <- blockLength*60*60*24*7 #num seconds in a week
}

lub.hours = lubridate::hour(x$dateTime) lub.dates = lubridate::date(x$dateTime)

daySecondList = lub.hours * 3600 + lubridate::minute(x$dateTime) * 60 + lubridate::second(x$dateTime) #This calculates a day-second
x<-x[order(lub.dates, daySecondList),] #in case this wasn't already done, we order by date and second. Note that we must order it in this round-about way (using the date and daySecond vectors) to prevent ordering errors that sometimes occurs with dateTime data

if(length(blockingStartTime) == 1){ #if the blockingStartTime argument is defined, we calculate how far it is away (in seconds) from the minimum timepoint in x

blockTimeAdjustment <- difftime(x$dateTime[1], blockingStartTime, units = c("secs")) }else{ #if the blockingStartTime argument is NOT defined, the adjustment is 0 blockTimeAdjustment <- 0 } #for some odd reason, difftime will output mostly zeroes (incorrectly) if there are > 1 correct 0 at the beginning. We use a crude fix here to address this. Basically, we create the zeroes first and combine it with other values afterwards totSecond <- rep(0, length(which(x$dateTime == x$dateTime[1]))) if(nrow(x) > length(totSecond)){ totSecond2<-as.integer(difftime(x$dateTime[(length(totSecond) +1): nrow(x)] ,x$dateTime[1], units = c("secs"))) }else{ totSecond2 <- NULL } studySecond <- as.integer((c(totSecond, totSecond2) -min(c(totSecond, totSecond2))) + 1) + blockTimeAdjustment numblocks <- as.integer(ceiling(max(studySecond)/blockLength1)) block<- ceiling(studySecond/blockLength1) block.start<-as.character((as.POSIXct(x$dateTime[1]) - blockTimeAdjustment) + ((block - 1)*blockLength1)) #identify the timepoint where each block starts (down to the second resolution)
block.end<-as.character((as.POSIXct(x$dateTime[1]) - blockTimeAdjustment) + ((block - 1)*blockLength1) + (blockLength1 -1)) #identify the timepoint where each block ends (down to the second resolution) x$block <- block
x$block.start <- block.start x$block.end <- block.end

rm(list = c("daySecondList", "block.start","block.end", "lub.dates", "lub.hours", "numblocks", "totSecond", "totSecond2", "studySecond", "block")) #remove these objects because they are no longer needed.

#now that we've assigined the block information, we can begin processing the data in earnest

#The first thing we need to do is remove any NAs in the data set (it's unlikely)

idSeq<- unique(x$id) blockSeq <- unique(x$block)
dist.colnames <-substring(colnames(x)[grep("dist.to.", colnames(x))],9) #Note specific id in "dist.to." colnames start at character 9. This will be different depending on if dist2all or dist2area was used
dist.colnames_modified <- gsub("indiv_","",dist.colnames) #remove "indiv_" from dist2all output

blockBreak.function <-function(l, environmentTag = thisEnvironment, columnNames = dist.colnames_modified){

#browser()
#Because the input data files can be quite large, we do not want to clone it excessively by running subfunctions. So, we relate the following expression to the previously-generated environment (where the data already exist)
assign("breakBlock", unname(unlist(l)), envir = environmentTag)

eval(expr = {
blockSub <- droplevels(x[which(x$block == breakBlock),]) #subset the data set by block fe1 <- foreach::foreach(j = idSeq, .noexport = c("idSub", "outputMat")) %do% { idSub <- droplevels(blockSub[which(blockSub$id == j),]) #subset dist.input to only contain the id-value of interest
outputMat<- matrix(nrow = 1, ncol = (3 + length(grep("dist.to.", colnames(blockSub))))) #set up the empty matrix to hold the output data.
colnames(outputMat)<- c("id", "potenDegree", "potenTotalContactDurations", paste("potenContactDurations_", columnNames, sep =""))

#if the individual was not present (i.e., nrow(idSub) == 0), then 0s well be put in the matrix. However, how many potential nodes present in the block takes a bit more effort to calculate because the distance input object may have come from dist2All_df or dist2Area_df.

potentialDegreeIdentifier <-NULL #create an empty vector to describe when nodes are present in the data set.

for(i in 1:length(grep("dist.to.", colnames(blockSub)))){ #loops through the potentially-contactable nodes.

presenceTest<- is.na(blockSub[,grep("dist.to.", colnames(blockSub))[i]]) #if nodes were not present all entries will be TRUE.

if(length(which(presenceTest == FALSE)) > 0){ #identify when the reported value is FALSE (i.e., when nodes WERE present)

potentialDegreeIdentifier <-c(potentialDegreeIdentifier, 1) #if nodes are present add 1 value to potentialDegreeIdentifier

}else{next}

}
if(distFunction == "dist2All_df"){ #if the dist2All_df function was used to generate x
potentialDegree <- ifelse(nrow(idSub) > 0, (length(potentialDegreeIdentifier) - 1), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed minus 1 (because individual j could not be in contact with itself).
}
if(distFunction == "dist2Area_df"){ #if the dist2Area_df function was used to generate x, then 1 does not need to be subtracted here
potentialDegree <- ifelse(nrow(idSub) > 0, length(potentialDegreeIdentifier), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed.
}
if(nrow(idSub) > 0){ #for some reason, if and else statements kept returning an error, so I was forced to use 2 if statements instead
potentialIndivDurations <- unname(apply(idSub[,grep("dist.to.", colnames(idSub))], 2, function(x){length(which(is.na(x) == FALSE))})) #This means if the individual WAS present the max number of durations potentially observed is the number of TSWs both individuals (or an individual and fixed area) were observed at the same time.
}
if(nrow(idSub) == 0){
potentialIndivDurations <- rep(0, length(4:ncol(outputMat)))
}

potentialDurations <- sum(as.integer(potentialIndivDurations)) #if the individual WAS present the maximum potential contact durations is the sum of all individuals observed at each time step during the time period/block

outputMat[1,(2:ncol(outputMat))] <- c(potentialDegree, potentialDurations, potentialIndivDurations)

outputFrame <- data.frame(outputMat, stringsAsFactors = TRUE) #convert to a data frame to allow storage of multiple data types (the ids will be character strings)
outputFrame$id <- j #define the id return(outputFrame) } potentialDurationsBlock<-data.frame(data.table::rbindlist(fe1), stringsAsFactors = TRUE) #bind the data together potentialDurationsBlock$block <- unique(blockSub$block) #define block info potentialDurationsBlock$block.start <- unique(blockSub$block.start) #define block info potentialDurationsBlock$block.end <- unique(blockSub$block.end) #define block info }, envir = environmentTag) return(potentialDurationsBlock) #note that all the other processes took place in the master-function frame, so we can just return NULL here. } potentialDurationsFrame<-data.frame(data.table::rbindlist(foreach::foreach(l = unique(x$block)) %do% blockBreak.function(l)), stringsAsFactors = TRUE)
potentialDurationsFrame<- potentialDurationsFrame[order(potentialDurationsFrame$block, potentialDurationsFrame$id),] #order the final output by block and id

}else{ #if blocking == FALSE

idSeq<- unique(x$id) dist.colnames <-substring(colnames(x)[grep("dist.to.", colnames(x))],9) #Note specific id in "dist.to." colnames start at character 9. This will be different depending on if dist2all or dist2area was used dist.colnames_modified <- gsub("indiv_","",dist.colnames) #remove "indiv_" from dist2all output fe1 <- foreach::foreach(j = idSeq, .noexport = c("idSub", "outputMat")) %do% { idSub <- droplevels(x[which(x$id == j),]) #subset dist.input to only contain the id-value of interest
outputMat<- matrix(nrow = 1, ncol = (3 + length(grep("dist.to.", colnames(x))))) #set up the empty matrix to hold the output data.
colnames(outputMat)<- c("id", "potenDegree", "potenTotalContactDurations", paste("potenContactDurations_", dist.colnames_modified, sep =""))

#if the individual was not present (i.e., nrow(idSub) == 0), then 0s well be put in the matrix.  However, how many potential nodes present in the block takes a bit more effort to calculate because the distance input object may have come from dist2All_df or dist2Area_df.

potentialDegreeIdentifier <-NULL #create an empty vector to describe when nodes are present in the data set.

for(i in 1:length(grep("dist.to.", colnames(x)))){ #loops through the potentially-contactable nodes.

presenceTest<- is.na(x[,grep("dist.to.", colnames(x))[i]]) #if nodes were not present all entries will be TRUE.

if(length(which(presenceTest == FALSE)) > 0){ #identify when the reported value is FALSE (i.e., when nodes WERE present)

potentialDegreeIdentifier <-c(potentialDegreeIdentifier, 1) #if nodes are present add 1 value to potentialDegreeIdentifier

}else{next}

}
if(distFunction == "dist2All_df"){ #if the dist2All_df function was used to generate x
potentialDegree <- ifelse(nrow(idSub) > 0, (length(potentialDegreeIdentifier) - 1), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed minus 1 (because individual j could not be in contact with itself).
}
if(distFunction == "dist2Area_df"){ #if the dist2Area_df function was used to generate x, then 1 does not need to be subtracted here
potentialDegree <- ifelse(nrow(idSub) > 0, length(potentialDegreeIdentifier), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed.
}

if(nrow(idSub) > 0){ #for some reason, if and else statements kept returning an error, so I was forced to use 2 if statements instead
potentialIndivDurations <- unname(apply(idSub[,grep("dist.to.", colnames(idSub))], 2, function(x){length(which(is.na(x) == FALSE))})) #This means if the individual WAS present the max number of durations potentially observed is the number of TSWs both individuals (or an individual and fixed area) were observed at the same time.
}
if(nrow(idSub) == 0){
potentialIndivDurations <- rep(0, length(4:ncol(outputMat))) #just zeroes
}

potentialDurations <- sum(as.integer(potentialIndivDurations)) #if the individual WAS present the maximum potential contact durations is the sum of all individuals observed at each time step during the time period/block

outputMat[1,(2:ncol(outputMat))] <- c(potentialDegree, potentialDurations, potentialIndivDurations)

outputFrame <- data.frame(outputMat, stringsAsFactors = TRUE) #convert to a data frame to allow storage of multiple data types (the ids will be character strings)
outputFrame$id <- j #define the id return(outputFrame) } potentialDurationsFrame<-data.frame(data.table::rbindlist(fe1), stringsAsFactors = TRUE) #bind the data together potentialDurationsFrame<- potentialDurationsFrame[order(potentialDurationsFrame$id),] #order the final output by id
rm(fe1) #remove to free up memory

}

#assign("potentialDurationsFrame", potentialDurations, envir = subEnvir1) #bring the function output back into the apply environment

}, envir = environmentTag)
return(potentialDurationsFrame)
}

potentialDurationsList<- foreach::foreach(m = seq(1:length(y))) %do% listBreak.function(m)
#potentialDurationsList<- apply(listBreakFrame, 1, listBreak.function, environmentTag = thisEnvironment)
return(potentialDurationsList)

}else{ #if x input is a single data frame

if(blocking == TRUE){

#the code immediately below comes from the contact::timeBlock.append function. We just refrain from calling it here to prevent the inputs being cloned within the function and because we don't need ALL the info that particular function would append to x. Thus, we save time and memory.

if(blockUnit == "Secs" || blockUnit == "SECS" || blockUnit == "secs"){
blockLength1 <- blockLength
}
if(blockUnit == "Mins" || blockUnit == "MINS" || blockUnit == "mins"){
blockLength1 <- blockLength*60 #num seconds in a minute
}
if(blockUnit == "Hours" || blockUnit == "HOURS" || blockUnit == "hours"){
blockLength1 <- blockLength*60*60 #num seconds in an hour
}
if(blockUnit == "Days" || blockUnit == "DAYS" || blockUnit == "days"){
blockLength1 <- blockLength*60*60*24 #num seconds in a day
}
if(blockUnit == "Weeks" || blockUnit == "WEEKS" || blockUnit == "weeks"){
blockLength1 <- blockLength*60*60*24*7 #num seconds in a week
}

lub.hours = lubridate::hour(x$dateTime) lub.dates = lubridate::date(x$dateTime)

daySecondList = lub.hours * 3600 + lubridate::minute(x$dateTime) * 60 + lubridate::second(x$dateTime) #This calculates a day-second
x<-x[order(lub.dates, daySecondList),] #in case this wasn't already done, we order by date and second. Note that we must order it in this round-about way (using the date and daySecond vectors) to prevent ordering errors that sometimes occurs with dateTime data

if(length(blockingStartTime) == 1){ #if the blockingStartTime argument is defined, we calculate how far it is away (in seconds) from the minimum timepoint in x

blockTimeAdjustment <- difftime(x$dateTime[1], blockingStartTime, units = c("secs")) }else{ #if the blockingStartTime argument is NOT defined, the adjustment is 0 blockTimeAdjustment <- 0 } #for some odd reason, difftime will output mostly zeroes (incorrectly) if there are > 1 correct 0 at the beginning. We use a crude fix here to address this. Basically, we create the zeroes first and combine it with other values afterwards totSecond <- rep(0, length(which(x$dateTime == x$dateTime[1]))) if(nrow(x) > length(totSecond)){ totSecond2<-as.integer(difftime(x$dateTime[(length(totSecond) +1): nrow(x)] ,x$dateTime[1], units = c("secs"))) }else{ totSecond2 <- NULL } studySecond <- as.integer((c(totSecond, totSecond2) -min(c(totSecond, totSecond2))) + 1) + blockTimeAdjustment numblocks <- as.integer(ceiling((max(studySecond) - 1)/blockLength1)) block <-rep(0,length(studySecond)) for(g in 1:(numblocks -1)){ #numblocks - 1 because the last block in the dataset may be smaller than previous blocks (if blockLength1 does not divide evenly into timedif) block[which(studySecond >= ((g-1)*blockLength1 + 1) & studySecond <= (g*blockLength1))] = g } if(length(which(block == 0)) > 0){ #identifies the last block block[which(block == 0)] = numblocks } block.start<-as.character((as.POSIXct(x$dateTime[1]) - blockTimeAdjustment) + ((block - 1)*blockLength1)) #identify the timepoint where each block starts (down to the second resolution)
block.end<-as.character((as.POSIXct(x$dateTime[1]) - blockTimeAdjustment) + ((block - 1)*blockLength1) + (blockLength1 -1)) #identify the timepoint where each block ends (down to the second resolution) x$block <- block
x$block.start <- block.start x$block.end <- block.end

rm(list = c("daySecondList", "block.start","block.end", "lub.dates", "lub.hours", "numblocks", "totSecond", "totSecond2", "studySecond", "block")) #remove these objects because they are no longer needed.

#now that we've assigined the block information, we can begin processing the data in earnest

idSeq<- unique(x$id) blockSeq <- unique(x$block)
dist.colnames <-substring(colnames(x)[grep("dist.to.", colnames(x))],9) #Note specific id in "dist.to." colnames start at character 9. This will be different depending on if dist2all or dist2area was used
dist.colnames_modified <- gsub("indiv_","",dist.colnames) #remove "indiv_" from dist2all output

blockBreak.function <-function(l, environmentTag = thisEnvironment, columnNames = dist.colnames_modified){

#browser()
#Because the input data files can be quite large, we do not want to clone it excessively by running subfunctions. So, we relate the following expression to the previously-generated environment (where the data already exist)
assign("breakBlock", unname(unlist(l)), envir = environmentTag)

eval(expr = {
blockSub <- droplevels(x[which(x$block == breakBlock),]) #subset the data set by block fe1 <- foreach::foreach(j = idSeq, .noexport = c("idSub", "outputMat")) %do% { idSub <- droplevels(blockSub[which(blockSub$id == j),]) #subset dist.input to only contain the id-value of interest
outputMat<- matrix(nrow = 1, ncol = (3 + length(grep("dist.to.", colnames(blockSub))))) #set up the empty matrix to hold the output data.
colnames(outputMat)<- c("id", "potenDegree", "potenTotalContactDurations", paste("potenContactDurations_", columnNames, sep =""))

#if the individual was not present (i.e., nrow(idSub) == 0), then 0s well be put in the matrix
potentialDegreeIdentifier <-NULL #create an empty vector to describe when nodes are present in the data set.

for(i in 1:length(grep("dist.to.", colnames(blockSub)))){ #loops through the potentially-contactable nodes.

presenceTest<- is.na(blockSub[,grep("dist.to.", colnames(blockSub))[i]]) #if nodes were not present all entries will be TRUE.

if(length(which(presenceTest == FALSE)) > 0){ #identify when the reported value is FALSE (i.e., when nodes WERE present)

potentialDegreeIdentifier <-c(potentialDegreeIdentifier, 1) #if nodes are present add 1 value to potentialDegreeIdentifier

}else{next}

}
if(distFunction == "dist2All_df"){ #if the dist2All_df function was used to generate x
potentialDegree <- ifelse(nrow(idSub) > 0, (length(potentialDegreeIdentifier) - 1), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed minus 1 (because individual j could not be in contact with itself).
}
if(distFunction == "dist2Area_df"){ #if the dist2Area_df function was used to generate x, then 1 does not need to be subtracted here
potentialDegree <- ifelse(nrow(idSub) > 0, length(potentialDegreeIdentifier), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed.
}
if(nrow(idSub) > 0){ #for some reason, if and else statements kept returning an error, so I was forced to use 2 if statements instead
potentialIndivDurations <- unname(apply(idSub[,grep("dist.to.", colnames(idSub))], 2, function(x){length(which(is.na(x) == FALSE))})) #This means if the individual WAS present the max number of durations potentially observed is the number of TSWs both individuals (or an individual and fixed area) were observed at the same time.
}
if(nrow(idSub) == 0){
potentialIndivDurations <- rep(0, length(4:ncol(outputMat)))
}

potentialDurations <- sum(as.integer(potentialIndivDurations)) #if the individual WAS present the maximum potential contact durations is the sum of all individuals observed at each time step during the time period/block

outputMat[1,(2:ncol(outputMat))] <- c(potentialDegree, potentialDurations, potentialIndivDurations)

outputFrame <- data.frame(outputMat, stringsAsFactors = TRUE) #convert to a data frame to allow storage of multiple data types (the ids will be character strings)
outputFrame$id <- j #define the id return(outputFrame) } potentialDurationsBlock<-data.frame(data.table::rbindlist(fe1), stringsAsFactors = TRUE) #bind the data together potentialDurationsBlock$block <- unique(blockSub$block) #define block info potentialDurationsBlock$block.start <- unique(blockSub$block.start) #define block info potentialDurationsBlock$block.end <- unique(blockSub$block.end) #define block info }, envir = environmentTag) return(potentialDurationsBlock) #note that all the other processes took place in the master-function frame, so we can just return NULL here. } potentialDurationsFrame<-data.frame(data.table::rbindlist(foreach::foreach(l = unique(x$block)) %do% blockBreak.function(l)), stringsAsFactors = TRUE)
potentialDurationsFrame<- potentialDurationsFrame[order(potentialDurationsFrame$block, potentialDurationsFrame$id),] #order the final output by block and id

}
else{ #if blocking == FALSE

idSeq<- unique(x$id) dist.colnames <-substring(colnames(x)[grep("dist.to.", colnames(x))],9) #Note specific id in "dist.to." colnames start at character 9. This will be different depending on if dist2all or dist2area was used dist.colnames_modified <- gsub("indiv_","",dist.colnames) #remove "indiv_" from dist2all output fe1 <- foreach::foreach(j = idSeq) %do% { idSub <- droplevels(x[which(x$id == j),]) #subset dist.input to only contain the id-value of interest
outputMat<- matrix(nrow = 1, ncol = (3 + length(grep("dist.to.", colnames(x))))) #set up the empty matrix to hold the output data.
colnames(outputMat)<- c("id", "potenDegree", "potenTotalContactDurations", paste("potenContactDurations_", dist.colnames_modified, sep =""))

#if the individual was not present (i.e., nrow(idSub) == 0), then 0s well be put in the matrix
potentialDegreeIdentifier <-NULL #create an empty vector to describe when nodes are present in the data set.

for(i in 1:length(grep("dist.to.", colnames(x)))){ #loops through the potentially-contactable nodes.

presenceTest<- is.na(x[,grep("dist.to.", colnames(x))[i]]) #if nodes were not present all entries will be TRUE.

if(length(which(presenceTest == FALSE)) > 0){ #identify when the reported value is FALSE (i.e., when nodes WERE present)

potentialDegreeIdentifier <-c(potentialDegreeIdentifier, 1) #if nodes are present add 1 value to potentialDegreeIdentifier

}else{next}

}
if(distFunction == "dist2All_df"){ #if the dist2All_df function was used to generate x
potentialDegree <- ifelse(nrow(idSub) > 0, (length(potentialDegreeIdentifier) - 1), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed minus 1 (because individual j could not be in contact with itself).
}
if(distFunction == "dist2Area_df"){ #if the dist2Area_df function was used to generate x, then 1 does not need to be subtracted here
potentialDegree <- ifelse(nrow(idSub) > 0, length(potentialDegreeIdentifier), 0) #if the individual WAS present the maximum potential degree is the number of individuals observed over the course of the time period/block when individual j was also observed.
}

if(nrow(idSub) > 0){ #for some reason, if and else statements kept returning an error, so I was forced to use 2 if statements instead
potentialIndivDurations <- unname(apply(idSub[,grep("dist.to.", colnames(idSub))], 2, function(x){length(which(is.na(x) == FALSE))})) #This means if the individual WAS present the max number of durations potentially observed is the number of TSWs both individuals (or an individual and fixed area) were observed at the same time.
}
if(nrow(idSub) == 0){
potentialIndivDurations <- rep(0, length(4:ncol(outputMat)))
}

potentialDurations <- sum(as.integer(potentialIndivDurations)) #if the individual WAS present the maximum potential contact durations is the sum of all individuals observed at each time step during the time period/block

outputMat[1,(2:ncol(outputMat))] <- c(potentialDegree, potentialDurations, potentialIndivDurations)
outputFrame <- data.frame(outputMat, stringsAsFactors = TRUE) #convert to a data frame to allow storage of multiple data types (the ids will be character strings)
outputFrame$id <- j #define the id return(outputFrame) } potentialDurationsFrame<-data.frame(data.table::rbindlist(fe1), stringsAsFactors = TRUE) #bind the data together potentialDurationsFrame<- potentialDurationsFrame[order(potentialDurationsFrame$id),] #order the final output by id
rm(fe1) #remove to free up memory

}
return(potentialDurationsFrame)
}
}


## Try the contact package in your browser

Any scripts or data that you put into this service are public.

contact documentation built on May 17, 2021, 5:07 p.m.