R/variousOtherHelperFunctions.R

Defines functions surveyReport_legacy intersect.Interval.fast camopPlot fractionOfDay addVideoHierarchicalSubject addVideoDateTimeOriginal processVideoArgument digiKamVideoHierarchicalSubject accessDigiKamDatabase makeProgressbar padMatrixWithNA deparseCamOpRownames stationSessionCamMatrix parseDateTimeObject parseDateObject dataFrameTibbleCheck assignSessionIDtoRecordTable assignSessionIDtoRecordTableIndividual makeSurveyZip calculateTrappingEffort cleanSubsetSpecies adjustCameraOperationMatrix createDateRangeTable checkCamOpColumnNames addNewColumnsToGlobalTable assessTemporalIndependence removeDuplicatesOfRecords addStationCameraID separateMultipleSpecies assignSpeciesID subset_taggroup addMetadataAsColumns runExiftool checkForSpacesInColumnNames .pkgVersionCRAN

# Version Check for .onAttach()  ####
# adapted from http://thecoatlessprofessor.com/programming/automatically-check-if-r-package-is-the-latest-version-on-package-load/. Thank you!


.pkgVersionCRAN <- function(pkg, cran_url="http://cran.r-project.org/web/packages/")
{
  # Create URL
  cran_pkg_loc <- paste0(cran_url,pkg)
  
  # Try to establish a connection
  suppressWarnings( conn <- try( url(cran_pkg_loc) , silent=TRUE ) )
  
  # If connection, try to parse values, otherwise return NULL
  if ( all( class(conn) != "try-error") ) {
    suppressWarnings( cran_pkg_page <- try( readLines(conn) , silent=TRUE ) )
    close(conn)
  } else {
    return(NULL)
  }
  
  # Extract version info
  version_line = cran_pkg_page[grep("Version:",cran_pkg_page)+1]
  gsub("<(td|\\/td)>","",version_line)
}


# for all functions in which user specifies column names: error if spaces in column names    ####
checkForSpacesInColumnNames <- function(...){
  
  z <- list(...)
  
  # if all arguments are of length 1, do
  if(all(sapply(z, FUN = length) == 1)){
    if(any(grepl(pattern = " ", x = unlist(z), fixed = TRUE))) stop("column names may not contain spaces: \n ",
                                                                    paste(names(z)[which(grepl(pattern = " ", x = unlist(z), fixed = TRUE))], "=",
                                                                          z[which(grepl(pattern = " ", x = unlist(z), fixed = TRUE))], collapse = "\n "),
                                                                    call. = FALSE)
  }
  
  # if the argument is of length >1, do
  if(any(sapply(z, FUN = length) > 1)){
    if(length(z) == 1) {
      if(any(grepl(pattern = " ", x = unlist(z[[1]]), fixed = TRUE))) stop("column names in '", names(z) ,"' may not contain spaces: \n ",
                                                                           paste(names(unlist(z))[which(grepl(pattern = " ", x = unlist(z), fixed = TRUE))], "=",
                                                                                 z[[1]][which(grepl(pattern = " ", x = unlist(z), fixed = TRUE))], collapse = "\n "),
                                                                           call. = FALSE)
    } 
    if(length(z) > 1) {
      which_is_the_culprit <- which(sapply(z, FUN = length) > 1)
      stop(paste("Argument", names(z)[which_is_the_culprit], "is not of length 1"), call. = FALSE)
    }
  }
}



# run exiftool for functions reading out and tabulating image metadata    ####

runExiftool <- function(command.tmp,
                        colnames.tmp)
{
  tmp1 <- strsplit(system(command.tmp, intern=TRUE), split = "\t")
  
  if(length(tmp1) == 0) return(NULL) # if nothing returned (no images, no metadata)
  if(length(tmp1) == 1) {
    if(grepl(tmp1[[1]], pattern = "^Error: ", perl = TRUE)){
      warning(paste(unlist(tmp1)), call. = FALSE)
      return(NULL)
      }
    }
  # if first entry is exiftool warning about FileName encoding remove (happens when there's special characters in directory name)
  if(any(grepl(pattern = "FileName encoding not specified", tmp1[[1]]))) tmp1[[1]] <- NULL
  
  metadata.tmp <- as.data.frame(matrix(unlist(lapply(tmp1, FUN = function(X){X[2]})),
                                       ncol = length(colnames.tmp),
                                       byrow = TRUE),
                                stringsAsFactors = FALSE)
  colnames(metadata.tmp) <- colnames.tmp
  
  # find and remove ._ files created on Macs
  strangeMacFiles <- grep("^[._]", metadata.tmp$FileName, fixed = FALSE)
  if(length(strangeMacFiles) >= 1)  {
    warning(paste("found", length(strangeMacFiles), "JPG files beginning with '._' in", paste(unique(metadata.tmp$Directory[strangeMacFiles]), collapse = "\n"), ". \nThese images will be ignored."), call. = FALSE, immediate. = TRUE)
    metadata.tmp <- metadata.tmp[-strangeMacFiles,]
  }
  return(metadata.tmp)
}


# add image metadata as columns in record table ####
addMetadataAsColumns <- function(intable,
                                 metadata.tagname,
                                 metadataHierarchyDelimitor,
                                 multiple_tag_separator)
{
  intable[,metadata.tagname] <- as.character(intable[,metadata.tagname])
  tmp2 <- strsplit(intable[,metadata.tagname], split = ",")      # split items of "HierarchicalSubject" at comma
  tmp3 <- lapply(tmp2, FUN = function(X){X[grep(pattern = metadataHierarchyDelimitor, x = X, fixed = TRUE)]})   # get only the ones with values
  
  
  # find all metadata categories, remove spaces
  list.tmp <- vector()
  
  # loop over images, get name of tag groups
    for(xy in 1:length(tmp3)){
    list.tmp <- c(list.tmp, gsub(pattern = " ",
                                 replacement = "",
                                 x =  sapply(strsplit(tmp3[[xy]],
                                                             split = metadataHierarchyDelimitor,
                                                             fixed = TRUE),
                                                    FUN = function(Y){Y = Y[1]})))
  }
  cols2add <- unique(list.tmp)    # these are the columns to add
  
  # add empty columns
  if(length(cols2add) >= 1){    # if anything to add
    intable <- data.frame(intable, 
                          matrix(NA, ncol = length(cols2add), nrow = nrow(intable)), 
                          check.names = FALSE)
    colnames(intable)[seq((ncol(intable) - length(cols2add) + 1),ncol(intable))] <- cols2add
    
    
    # fill metadata columns
    for(index_new_col in 1:length(cols2add)){                 # loop over metadata tag groups
      
      taggroup <- paste0(cols2add[index_new_col],
                         metadataHierarchyDelimitor)
      

      for(img in 1:length(tmp3)) {
        
        current <- tmp3[[img]]
        if(length(current) == 1) if(current == "") intable[img, cols2add[index_new_col]] <- NA
        
        # remove leading space
        current <- gsub("^ ", "", current)
        
        # subset to current tag group
        current_subset <- subset_taggroup(current, taggroup)
        
        use_these <- vector(length = length(current_subset))
        # check if current tag is included in another tag at a deeper level in the hierarchy
        for(tag in 1:length(current_subset)){
          use_these[tag] <- ifelse(is.na(charmatch(current_subset[tag], current_subset[-tag])), TRUE, FALSE)
        }
          
          #get the deepest hierarchical level of the tag
          current_subset_split <- sapply(current_subset[use_these], 
                                         strsplit,
                                         split = metadataHierarchyDelimitor,
                                         fixed = TRUE)
          tags_to_use <- sapply(current_subset_split, FUN = function(x) x [length(x)]) 
          intable[img, cols2add[index_new_col]] <- paste(tags_to_use, collapse = multiple_tag_separator)
          
          rm(tags_to_use, current_subset_split, current_subset, current)
        }
    } # end for index_new_col
  }  # end if(length(cols2add) >= 1){ 
  
  which_cols_to_rename <- which(colnames(intable) %in% cols2add)
  
  # rename metadata columns with prefix "metadata_"
  colnames(intable)[which_cols_to_rename] <- paste("metadata_", colnames(intable)[which_cols_to_rename], sep = "")
  
  return(intable)
}




subset_taggroup <- function(x, parent) {
  x[grep(x = x,
         pattern = parent,
         fixed = TRUE)]
}




# assign species IDs from metadata tags or directory names   ####
assignSpeciesID <- function(intable,
                            IDfrom,
                            metadataSpeciesTag,
                            speciesCol,
                            dirs_short,
                            i_tmp,
                            multiple_tag_separator,
                            returnFileNamesMissingTags,
                            parent = "recordTable")
{
  
  if(parent == "recordTable") {
    parent_name <- "species"
    parent_argname <- "metadataSpeciesTag"
  }
  if(parent == "recordTableIndividual") {
    parent_name <- "individual" 
    parent_argname <- "metadataIDTag"
  }
  
  file.sep <- .Platform$file.sep
  
  
  if(IDfrom == "directory"){
    intable[,speciesCol] <-  sapply(strsplit(intable$Directory, split = file.sep, fixed = TRUE), FUN = function(X){X[length(X)]})
    return(intable)
  } else {           # if IDfrom = "metadata"
    if(hasArg(metadataSpeciesTag)){
      metadataSpeciesTag2 <- paste("metadata", metadataSpeciesTag, sep = "_")
      
      # if the metadata_Species tag is found in metadata.tmp1
      if(metadataSpeciesTag2 %in% colnames(intable)){
        
        # copy to species column proper
        intable[,speciesCol] <- intable[,metadataSpeciesTag2]
        
        # find records without proper species tag, to be removes
        species_records_to_remove <- which(is.na(intable[,speciesCol]))
        
        # if there's records to remove
        if(length(species_records_to_remove) >= 1){    
          
          # give warnings
          if(isTRUE(returnFileNamesMissingTags)){
            warning(paste(paste( dirs_short[i_tmp],":  removed", length(species_records_to_remove), "images out of", nrow(intable),
                                 "because of missing", parent_name, "metadata tag:\n"),
                          paste(head(paste(intable$Directory[species_records_to_remove], intable$FileName[species_records_to_remove], sep = file.sep)), collapse = "\n")),
                    call. = FALSE, immediate. = TRUE)
          } else {
            warning(paste(paste( dirs_short[i_tmp],":  removed", length(species_records_to_remove), "images out of", nrow(intable),
                                 "because of missing", parent_name, "metadata tag")),
                    call. = FALSE, immediate. = TRUE)
          }
          #remove records without species tag
          intable <- intable[-species_records_to_remove,]      
        }
        
        # duplicate records with multiple species (separate row for each species)
        intable <- separateMultipleSpecies (intable                = intable,
                                            speciesCol             = speciesCol,
                                            multiple_tag_separator = multiple_tag_separator)
        
        return(intable)
      } else {
        warning(paste(dirs_short[i_tmp], ":   metadataSpeciesTag '", metadataSpeciesTag, "' not found in image metadata tag 'HierarchicalSubject'.", 
                      ifelse(grepl(" ", metadataSpeciesTag, fixed = T), paste0("This is probably because of an empty space in '", metadataSpeciesTag, "'"), ""), 
                      sep = ""), call. = FALSE, immediate. = TRUE)
        
        return("found no species tag")
      }
    } else {
      stop(paste("station", dirs_short[i_tmp], ":   cannot figure out", parent_name, "names. Is", parent_argname, "defined?"), call. = FALSE)
    }
  }
}


# find and separate multiple species in same image (only if using metadata ID)    ####
separateMultipleSpecies <- function(intable,
                                    speciesCol,
                                    multiple_tag_separator)
{
  
  records0                 <- intable[,speciesCol]
  records_duplicate        <- strsplit(intable[,speciesCol], split = multiple_tag_separator, fixed = TRUE)
  records_duplicate_length <- sapply(records_duplicate, length)
  
  if(any(records_duplicate_length > 1)){
    intable <- intable[rep(row.names(intable), records_duplicate_length), ]                # replicate rows with >1 species / individual
    intable[,speciesCol] <- unlist(strsplit (records0, split = multiple_tag_separator))    # assign species anew
  }
  return(intable)
}




# add station and camera id to metadata table   ####

addStationCameraID <- function(intable,
                               dirs_short,
                               stationCol,
                               cameraCol,
                               cameraID,
                               hasStationFolders,
                               i,
                               IDfrom)
{
  
  file.sep <- .Platform$file.sep
  
  # append station ID
  
  if(isTRUE(hasStationFolders)) {       # take station ID from station directories
    intable <- cbind(intable, dirs_short[i])
    colnames(intable)[ncol(intable)] <- stationCol
    
  } else  {                             # take station ID from image filenames
    
    station.tmp  <- try(sapply(strsplit(as.character(intable$FileName), split = "__"), FUN = function(X){X[1]}))      # assumes filenames: STATION__Camera__Date/Time(Number).JPG)
    if(length(station.tmp) == nrow(intable)){
      intable <- cbind(intable, station.tmp)
      colnames(intable)[ncol(intable)] <- stationCol
    } else {
      stop(paste(dirs_short[i], ": numbers of images and station ID extracted from image names do not match. Do image filenames begin with station IDs?"))
    }
  }
  
  # append camera ID
  
  if(hasArg(cameraID)){
    if(cameraID == "filename"){
      camera.tmp  <- try(sapply(strsplit(as.character(intable$FileName), split = "__"), FUN = function(X){X[2]}))      # assumes filenames: Station__CAMERA__Date/Time(Number).JPG)
      if(length(camera.tmp) == nrow(intable)){
        intable <- cbind(intable, camera.tmp)
        colnames(intable)[ncol(intable)]     <- cameraCol
      }
    }
    if(cameraID == "directory"){            # this can only happen in recordTable. Not in recordTableIndividual
      if(IDfrom == "directory"){             # assumes directory structure: Station/Camera/Species
        intable <- cbind(intable,
                         sapply(strsplit(intable$Directory, split = file.sep, fixed = TRUE), FUN = function(X){X[length(X) - 1]}))
      } else {                                    # assumes directory structure: Station/Camera
        intable <- cbind(intable,
                         sapply(strsplit(intable$Directory, split = file.sep, fixed = TRUE), FUN = function(X){X[length(X)]}))
      }
      colnames(intable)[ncol(intable)]     <- cameraCol
    }
  }
  return(intable)
}

# check if date/time information is present and was readable    ####

checkDateTimeOriginal <- function (intable, dirs_short, i){
  # if all date/time information is missing, go to next station
  if(all(intable$DateTimeOriginal == "-")){
    warning(paste(dirs_short[i], ": no readable date/time information. Skipping"), call. = FALSE,  immediate. = TRUE)
    intable <- NULL
  } else {
    
    # if date/time information is missing for some records only
    if(any(intable$DateTimeOriginal == "-")){
      which_no_time <- which(intable$DateTimeOriginal == "-")
      warning(paste(dirs_short[i], ": omitting", length(which_no_time), "images because of missing/unreadable date/time information."), call. = FALSE,  immediate. = FALSE)
      intable <-  intable[-which_no_time,]    # removing rows with missing date/time information
    }
  }
  return(intable)
}

# remove duplicate records of same species taken in same second at the same station (by the same camera, if relevant)   ####
# Note to self: this may also be done outside the station loop, after the final record table is assembled. Saves a few executions of this function.
# edit 2019-12-04: but then the messages are useless, so just leave it as is for now

removeDuplicatesOfRecords <- function(metadata.tmp, 
                                      removeDuplicateRecords, 
                                      camerasIndependent, 
                                      stationCol, 
                                      speciesCol, 
                                      cameraCol, 
                                      current, 
                                      total,
                                      max_nchar_station){
  metadata.tmp0 <- metadata.tmp
  
  pb <- makeProgressbar(current = current, total = total)
  
  if(isTRUE(removeDuplicateRecords)){
    if(isTRUE(camerasIndependent)){
      remove.tmp <- which(duplicated(metadata.tmp[,c("DateTimeOriginal", stationCol, speciesCol, cameraCol)]))
      if(length(remove.tmp >= 1)){
        metadata.tmp <- metadata.tmp[-remove.tmp,]
      }
    } else {
      remove.tmp <- which(duplicated(metadata.tmp[,c("DateTimeOriginal", stationCol, speciesCol)]))
      if(length(remove.tmp >= 1)) {
        metadata.tmp <- metadata.tmp[-remove.tmp,]
      }
    }
    
    if(length(unique(metadata.tmp[,stationCol])) == 1) {                  # 1 station per exiftool call
      message(formatC(as.character(unique(metadata.tmp[,stationCol])), 
                      width = max_nchar_station, 
                      flag = "-"), ":  ",
              formatC(nrow(metadata.tmp0), width = 5), " images ", 
              formatC(length(remove.tmp),  width = 4), " duplicates removed",
              pb)
    } else {                                                               # > 1 station per exiftool call (recordTableIndividual)
      message(paste(unique(metadata.tmp[,stationCol]), collapse = ", "), ":  ",
              formatC(nrow(metadata.tmp0), width = 5), " images ", 
              formatC(length(remove.tmp),  width = 4), " duplicates removed",
              pb)
    }
  } else {
    if(length(unique(metadata.tmp[,stationCol])) == 1) {                  # 1 station per exiftool call
      message(formatC(as.character(unique(metadata.tmp[,stationCol])), 
                      width = max_nchar_station, 
                      flag = "-"), ":  ",
              formatC(nrow(metadata.tmp0), width = 5), " images", 
              pb)
    } else {                                                               # > 1 station per exiftool call (recordTableIndividual)
      message(paste(unique(metadata.tmp[,stationCol]), collapse = ", "), ":  ", 
              formatC(nrow(metadata.tmp0), width = 5), " images", 
              pb)
    }
  }
  return(metadata.tmp)
}


# assess temporal independence between records   ####

assessTemporalIndependence <- function(intable,
                                       deltaTimeComparedTo,
                                       columnOfInterest,     # species/individual column
                                       cameraCol,
                                       camerasIndependent,
                                       stationCol,
                                       minDeltaTime,
                                       eventSummaryColumn,
                                       eventSummaryFunction)
{
  # check if all Exif DateTimeOriginal tags were read correctly
  if(any(is.na(intable$DateTimeOriginal))){
    which.tmp <- which(is.na(intable$DateTimeOriginal))
    if(length(which.tmp) == nrow(intable)) stop("Could not read any Exif DateTimeOriginal tag at station: ", paste(unique(intable[which.tmp, stationCol])), " Consider checking for corrupted Exif metadata.")
    warning(paste("Could not read Exif DateTimeOriginal tag of", length(which.tmp),"image(s) at station", paste(unique(intable[which.tmp, stationCol]), collapse = ", "), ". Will omit them.\nConsider checking for corrupted Exif metadata. Or does your selected time zone have daylight saving time and the image(s) fall in the misisng hour at spring formward (cameras don't usually record DST)?. \n",
                  paste(file.path(intable[which.tmp, "Directory"],
                                  intable[which.tmp, "FileName"]), collapse = "\n")), call. = FALSE, immediate. = TRUE)
    intable <- intable[-which.tmp ,]
    rm(which.tmp)
  }
  
  # prepare to add time difference between observations columns
  intable <- data.frame(intable,
                        delta.time.secs  = NA,
                        delta.time.mins  = NA,
                        delta.time.hours = NA,
                        delta.time.days  = NA,
                        independent      = ifelse(minDeltaTime == 0, TRUE, NA),   # all independent if no temporal filtering
                        stringsAsFactors = FALSE,
                        check.names      = FALSE)        # to prevent ":" being converted to ".", e.g. in EXIF:Make
  
  # sort records by station, species, then time
  intable <- intable[order(intable[, stationCol], intable[, columnOfInterest], intable$DateTimeOriginal),]
  
  for(xy in 1:nrow(intable)){     # for every record
    
    
    which.columnOfInterest <- which(intable[, columnOfInterest]  == intable[xy, columnOfInterest])          # same species/individual
    which.stationCol       <- which(intable[, stationCol]        == intable[xy, stationCol])                # at same station
    which.independent      <- which(intable$independent          == TRUE)                                   # independent (first or only record of a species at a station)
    which.earlier          <- which(intable$DateTimeOriginal     <  intable$DateTimeOriginal[xy])          # earlier than record xy (takes long)
    #which.earlier          <- 1: (xy-1)                                                                  # earlier than record xy  (fast alternative, relies on table being sorted by date/time before anything else)
    if(camerasIndependent) {
      which.cameraCol      <- which(intable[, cameraCol]  == intable[xy, cameraCol])                        # at same camera
    }
    
    # set independent = TRUE and delta.time = 0 if it is the 1st/only  record of a species / individual
    
    if(camerasIndependent == TRUE){
      which.tmp <- Reduce(intersect, list(which.columnOfInterest, 
                                          which.stationCol, 
                                          which.cameraCol))
      if(intable$DateTimeOriginal[xy]  == min(intable$DateTimeOriginal[which.tmp])){    # cameras at same station assessed independently
        intable$independent[xy]       <- TRUE
        intable$delta.time.secs[xy]   <- 0
      }
    } else {
      which.tmp <- Reduce(intersect, list(which.columnOfInterest, 
                                          which.stationCol))
      if(intable$DateTimeOriginal[xy]  == min(intable$DateTimeOriginal[which.tmp])){
        intable$independent[xy]       <- TRUE
        intable$delta.time.secs[xy]   <- 0
      }
    }
    
    # calculate time difference to previous records of same species at this station (if not the 1st/only record)
    if(is.na(intable$delta.time.secs[xy])) {
      
      if(deltaTimeComparedTo == "lastIndependentRecord"){
        
        if(camerasIndependent == TRUE){
          which_time2 <- Reduce(intersect, list(which.columnOfInterest, 
                                              which.stationCol,
                                              which.cameraCol,
                                              which.independent,
                                              which.earlier))
        } else {
          
          which_time2 <- Reduce(intersect, list(which.columnOfInterest, 
                                                which.stationCol,
                                                which.independent,
                                                which.earlier))
        }
      }  else {    # if(deltaTimeComparedTo == "lastRecord"){'
        if(camerasIndependent  == TRUE){
          which_time2 <- Reduce(intersect, list(which.columnOfInterest, 
                                                which.stationCol,
                                                which.cameraCol,
                                                which.earlier))
        } else {
          which_time2 <- Reduce(intersect, list(which.columnOfInterest, 
                                                which.stationCol,
                                                which.earlier))
        }
      }
      
      # time difference to last (independent) record
      diff_tmp <- min(na.omit(difftime(time1 = intable$DateTimeOriginal[xy],            # delta time to last independent record
                                       time2 = intable$DateTimeOriginal[which_time2],
                                       units = "secs")))
      
      # save delta time in seconds
      intable$delta.time.secs[xy] <-  diff_tmp
      if(intable$delta.time.secs[xy] >= (minDeltaTime * 60) | intable$delta.time.secs[xy] == 0){
        intable$independent[xy] <- TRUE
      } else {
        intable$independent[xy] <- FALSE
      }
      
    }   # end   if(intable$DateTimeOriginal[xy] == min(...)} else {...}
  }     # end for(xy in 1:nrow(intable))
  
  # summarise some column by independent event
  
  n_imagesColumn <- "n_images"
  intable[, n_imagesColumn] <- NA
  
  which_independent <- which(intable$independent)
  
  if(hasArg(eventSummaryColumn)){
    if(all(eventSummaryColumn %in% colnames(intable))){
      if(length(eventSummaryColumn) != length(eventSummaryFunction)) stop('"eventSummaryColumn" and "eventSummaryFunction" must have same length', call. = FALSE)
      
    } else {
      warning(paste(unique(intable[, stationCol]), ": eventSummaryColumn(s) " ,
                    paste(eventSummaryColumn[!eventSummaryColumn %in% colnames(intable)], collapse = ", "),
                    " not found in column names of recordtable"), call. = FALSE)
    }
    
    summary_column_name <- paste(eventSummaryColumn, eventSummaryFunction, sep = "_")
    intable[, summary_column_name[eventSummaryColumn %in% colnames(intable)]] <- NA
    
    }    # end    if(hasArg(eventSummaryColumn)){
  
  
  for(xy in 1:length(which_independent)){     # for every independent record (the ones that end up in record table)
    
    current_row <- which_independent[xy]
    
    if(camerasIndependent){
      which_records_to_group <- which(intable[, columnOfInterest]     == intable[current_row, columnOfInterest] &   # same species
                                      intable[, stationCol]           == intable[current_row, stationCol]  &        # same station
                                      intable[, cameraCol]            == intable[current_row, cameraCol]   &        # same camera
                                      intable$DateTimeOriginal        >= intable$DateTimeOriginal[current_row] &    # later than current record
                                      !isTRUE(intable$independent))                                        # not independent
      
    } else {
      which_records_to_group <- which(intable[, columnOfInterest]     == intable[current_row, columnOfInterest] &   # same species
                                      intable[, stationCol]           == intable[current_row, stationCol]  &        # same station
                                      intable$DateTimeOriginal        >= intable$DateTimeOriginal[current_row] &
                                      !isTRUE(intable$independent))                                                 # not independent
    }
    
    # subset to records before the next independent record
    if(xy < length(which_independent)){
      which_records_to_group <- which_records_to_group[which_records_to_group < which_independent[xy + 1]] #which_records_to_group[which_records_to_group %in% seq(current_row, (which_independent[xy + 1] - 1))]
    } else {
      which_records_to_group <-  which_records_to_group[which_records_to_group <= nrow(intable)]   # which_records_to_group[which_records_to_group %in% seq(current_row, nrow(intable))]
    }
    
    if(hasArg(eventSummaryColumn)){
      
      for(eventSummaryIndex in 1:length(eventSummaryColumn)) {
        
        if(eventSummaryColumn[eventSummaryIndex] %in% colnames(intable)){
          
          summary_value <- do.call(what = eventSummaryFunction[eventSummaryIndex], 
                                   args = list(intable[which_records_to_group, eventSummaryColumn[eventSummaryIndex]], 
                                               na.rm = TRUE))
          if(!is.infinite(summary_value)){
            intable[current_row, summary_column_name[eventSummaryIndex]] <- ifelse(length(summary_value) > 1, 
                                                                                   paste(summary_value, collapse = ", "),
                                                                                   summary_value)
          }
          rm(summary_value)
        }
        
        
      }    # end for(eventSummaryIndex in 1:length(eventSummaryColumn)
    }      # end if(hasArg(eventSummaryColumn))
    
    
    # quick and dirty solution to prevent n_images = 0 when lenght(which_records_to_group) = 0 (can happen if only 1 image in a directory)
    
    if(length(which_records_to_group) == 0) {
      intable[ current_row, n_imagesColumn] <- 1
    } else {
      intable[ current_row, n_imagesColumn] <- length(which_records_to_group)
    }
    
    rm(which_records_to_group, current_row)
    
  }        # end for(xy in 1:length(which_independent))
  
  
  # keep only independent records
  outtable <- intable[intable$independent,]
  
  
  # compute delta time in hours and days
  outtable$delta.time.secs  <- round(outtable$delta.time.secs, digits = 0)
  outtable$delta.time.mins  <- round(outtable$delta.time.secs  / 60, digits = 1)
  outtable$delta.time.hours <- round(outtable$delta.time.mins  / 60, digits = 1)
  outtable$delta.time.days  <- round(outtable$delta.time.hours / 24, digits = 1)
  
  # remove "independent" column
  
  outtable <- outtable[, !colnames(outtable) %in% "independent"]
  
  return(outtable)
}


# add potential new columns to global record.table   ####

addNewColumnsToGlobalTable <- function(intable,
                                       i,
                                       record.table)
{
  
  if( nrow(record.table) >= 1){    # if there is a record table already (i.e., it is not the first station with images)
    which_cols_to_add_to_d1 <- seq(1, ncol(record.table))[-which(colnames(record.table) %in% colnames(intable))]   # columns in record.table but not in intable
    
    # if intable lacks columns present in record.table, add them here (filled with NA)
    if(length(which_cols_to_add_to_d1) >= 1){
      intable <- data.frame(intable, as.list(rep(NA, each = length(which_cols_to_add_to_d1))))
      colnames(intable)[(ncol(intable) - length(which_cols_to_add_to_d1) + 1) :  ncol(intable)] <- colnames(record.table)[which_cols_to_add_to_d1]
    }
    
    # now check which columns are present in intable but not in record.table (new tag groups) and add these (filled with NA)
    which_cols_to_add_to_record.table <- seq(1, ncol(intable))[-which(colnames(intable) %in% colnames(record.table))]  # columns present in intable but not in record.table
    if(length(which_cols_to_add_to_record.table) >= 1){
      record.table <- data.frame(record.table, 
                                 as.list(rep(NA, each = length(which_cols_to_add_to_record.table))),
                                 check.names = FALSE)
      colnames(record.table)[(ncol(record.table) - length(which_cols_to_add_to_record.table) + 1) :  ncol(record.table)] <- colnames(intable)[which_cols_to_add_to_record.table]
    }
    outtable <- intable[,match(colnames(record.table), colnames(intable))]
  } else {
    outtable <- intable
  }
  return(list(outtable, record.table))
}



#####################################################

# for detectionHistory functions

# check column names of camera operation matrix  ####
# if there's a time shift from cameraOperation, extract return number as attribute
checkCamOpColumnNames <- function(cameraOperationMatrix){
  
  if(any(is.na(colnames(cameraOperationMatrix)))) stop("There are NAs in the column names of camOp", call. = FALSE)
  
  # check if camera operration matrix has time shift
  if(all(grepl(pattern = "+", colnames(cameraOperationMatrix), fixed = TRUE))){
    colnames_as_dates <- sapply(strsplit(colnames(cameraOperationMatrix), split = "+", fixed = TRUE), FUN = function(x)x[1])
    camopTest <- try(as.Date(colnames_as_dates), silent = TRUE)
    occasionStartTime <- as.numeric(gsub("h", "", unique(sapply(strsplit(colnames(cameraOperationMatrix), split = "+", fixed = TRUE), FUN = function(x)x[2]))))
  } else {
    occasionStartTime <- 0
    camopTest <- try(as.Date(colnames(cameraOperationMatrix)), silent = TRUE)
  }
  
  if("try-error" %in% class(camopTest)) stop(paste('Could not interpret column names in camOp as Dates. Desired format is YYYY-MM-DD (e.g. "2016-12-31") YYYY-MM-DD+Xh (X being a number, e.g. "2016-12-31+12h"). First column name in your camera operation matrix is "', colnames(cameraOperationMatrix)[1], '"', sep = '' ), call. = FALSE)
  
  
  colnames(cameraOperationMatrix) <- as.character(camopTest)
  
  attr(cameraOperationMatrix, "occasionStartTime") <- occasionStartTime   # return time shift extracted from the column names
  return(cameraOperationMatrix)    
}


# create table of station / camera deployment and image data ranges  ####
createDateRangeTable <- function(cam.op,
                                 subset_species_tmp,
                                 buffer_tmp,
                                 stationCol_tmp,
                                 day1_tmp,
                                 occasionStartTime_tmp,
                                 maxNumberDays_tmp,
                                 timeZone_tmp)
{
  
  # first day of each station
  cam.tmp.min <- apply(cam.op, MARGIN = 1, function(X){min(which(!is.na(X)))})
  # last day of each station
  cam.tmp.max <- apply(cam.op, MARGIN = 1, function(X){max(which(!is.na(X)))})
  
  # date of first / last record by station
  rec.tmp.min  <- aggregate(as.Date(subset_species_tmp$DateTime2, tz = timeZone_tmp),
                            list(subset_species_tmp[,stationCol_tmp]),
                            FUN = min)
  rec.tmp.max  <- aggregate(as.Date(subset_species_tmp$DateTime2, tz = timeZone_tmp),
                            list(subset_species_tmp[,stationCol_tmp]),
                            FUN = max)
  
  
  # combine record dates and camera operation dates in one table
  date_ranges <- data.frame(rec.min = rec.tmp.min[match(rownames(cam.op), rec.tmp.min[,1]), 2],       # first record
                            rec.max = rec.tmp.max[match(rownames(cam.op), rec.tmp.max[,1]), 2],       # last record
                            cam.min = as.POSIXct(colnames(cam.op)[cam.tmp.min], tz = timeZone_tmp),   # station setup date
                            cam.max = as.POSIXct(colnames(cam.op)[cam.tmp.max], tz = timeZone_tmp)    # station retrieval date
  )
  
  rownames(date_ranges) <- rownames(cam.op)
  
  # check if images were taken between setup and retrieval dates (warning if images outside station date range)
  if(any(date_ranges$rec.min < as.Date(date_ranges$cam.min, tz = timeZone_tmp), na.rm = TRUE)){
    warning(paste("\nAt", sum(date_ranges$rec.min < as.Date(date_ranges$cam.min, tz = timeZone_tmp), na.rm = TRUE), "stations",
                  "there were records before camera operation date range: ",
                  paste(rownames(date_ranges)[which(date_ranges$rec.min < as.Date(date_ranges$cam.min, tz = timeZone_tmp))], 
                        sep = "\n", collapse = ", " )), call. = FALSE)
  }
  if(any(date_ranges$rec.max > as.Date(date_ranges$cam.max, tz = timeZone_tmp), na.rm = TRUE)) {
    warning(paste("\nAt", sum(date_ranges$rec.max > as.Date(date_ranges$cam.max, tz = timeZone_tmp), na.rm = TRUE), "stations",
                  "there were records after camera operation date range: ",
                  paste(rownames(date_ranges)[which(date_ranges$rec.max > as.Date(date_ranges$cam.max, tz = timeZone_tmp))], 
                        sep = "\n", collapse = ", " )), call. = FALSE)
  }
  
  # define when first occasion begins (to afterwards remove prior records in function    cleanSubsetSpecies)
  if(!hasArg(buffer_tmp)) buffer_tmp <- 0
  
  
  date_ranges$start_first_occasion <- date_ranges$cam.min + buffer_tmp * 86400 + occasionStartTime_tmp * 3600     #each stations setup  + buffer + starttime
  date_ranges$start_first_occasion_survey <- min(date_ranges$cam.min) + buffer_tmp * 86400 + occasionStartTime_tmp * 3600    # first station's setup  + buffer + starttime

  
  if(day1_tmp %in% c("survey", "station") == FALSE) {
    if(as.Date(day1_tmp, tz = timeZone_tmp) < min(as.Date(date_ranges$cam.min,  tz = timeZone_tmp))) stop(paste("day1 (", day1_tmp, ") is before the first station's setup date (",  min(as.Date(date_ranges$cam.min,  tz = timeZone_tmp)), ")", sep = ""))
    if(as.Date(day1_tmp, tz = timeZone_tmp) > max(as.Date(date_ranges$cam.max,  tz = timeZone_tmp))) stop(paste("day1 (", day1_tmp, ") is after the last station's retrieval date (",  max(as.Date(date_ranges$cam.max,  tz = timeZone_tmp)), ")", sep = ""))
    date_ranges$start_first_occasion <- as.POSIXlt(day1_tmp, tz = timeZone_tmp) + occasionStartTime_tmp * 3600
  }
  
  
  
  # # define when last occasion ends 
  # the old way:
  date_ranges$end_of_retrieval_day <- as.POSIXct(paste(date_ranges$cam.max, "23:59:59"), tz = timeZone_tmp, format = "%Y-%m-%d %H:%M:%S")    # end of retrieval day
  
  # new possible solution:
  # # Option 1: end of retrieval day
  # end_of_retrieval_day <- as.POSIXct(paste(date_ranges$cam.max, "23:59:59"), tz = timeZone_tmp, format = "%Y-%m-%d %H:%M:%S")    # end of retrieval day
  # # Option 2: exactly at retrieval hour (this only makes sense if value in camOp on retrieval day is fraction (if time of day was provided))
  # # if there was a problem on the last day, it will be incorrect (return 0:0:0 if effort = 0)
  # end_of_retrieval_day_hour <- date_ranges$cam.max + dhours(apply(cam.op, MARGIN = 1, function(X){X[max(which(!is.na(X)))] * 24}))
  # 
  # # define end of survey: exact retrieval time (if effort is a fraction of day)
  # if(all(end_of_retrieval_day_hour < end_of_retrieval_day)) {
  #   date_ranges$end_of_retrieval_day <- end_of_retrieval_day_hour
  # } else{
  #   date_ranges$end_of_retrieval_day <- end_of_retrieval_day
  # }
  
  # if maxNumberDays is defined, find which is earlier: start + maxNumberDays or station retrieval?
  if(hasArg(maxNumberDays_tmp)) {
    
    if(day1_tmp %in% c("survey", "station") == FALSE){
      # count maximum number days from the beginning of each station's 1st occasion
      date_ranges$start_first_occasion_plus_maxNumberDays <- date_ranges$start_first_occasion_survey + (maxNumberDays_tmp * 86400) - 1   # -1 second ensures that last occasion does not spill into next day if occasionStartTime = 0
    } else {
      # count maximum number days from the beginning of survey's 1st occasion
      date_ranges$start_first_occasion_plus_maxNumberDays <- date_ranges$start_first_occasion + (maxNumberDays_tmp * 86400) - 1   # -1 second ensures that last occasion does not spill into next day if occasionStartTime = 0
    }
    
    # end of last occasion by staion (either end of retrieval day, or after maximum number of days)
    for(xy in 1:nrow(date_ranges)){
      date_ranges$end_last_occasion[xy] <- min(date_ranges$end_of_retrieval_day[xy], date_ranges$start_first_occasion_plus_maxNumberDays[xy])   # use smaller value
    }
    # assign the attributes: POSIX + time zone (to convert from numeric value back to date/time)
    attributes(date_ranges$end_last_occasion) <- attributes(date_ranges$start_first_occasion)   
  } 
  
  # if maxNumberDays is not defined, occasions end on station retrieval?
  if(!hasArg(maxNumberDays_tmp)) {
    date_ranges$end_last_occasion <- date_ranges$end_of_retrieval_day
  }
  
  return(date_ranges)
  
}




# check camera operation matrix with date range table  ####
adjustCameraOperationMatrix <- function(cam.op,
                                        date_ranges2,
                                        timeZone_tmp,
                                        day1_2
){
  # remove stations where occasions begin after end of last occasion (if buffer argument is too large)
  if(any(date_ranges2$start_first_occasion > date_ranges2$end_last_occasion)){
    remove.these.stations <- which(date_ranges2$start_first_occasion > date_ranges2$end_last_occasion)
    if(length(remove.these.stations) == nrow(date_ranges2)) stop("In all stations, the occasions begin after retrieval. Choose a smaller buffer argument.")
    cam.op [remove.these.stations, ] <- NA
  }
  
  
  ##################################
  # set values before beginning of first occasion NA in camera operation matrix (so effort is 0 before): only relevant if buffer was used
  first_col_to_keep2  <- match(as.character(as.Date(date_ranges2$start_first_occasion, tz = timeZone_tmp)), colnames(cam.op))
  
  
  for(xxx in 1:length(first_col_to_keep2)){
    if(first_col_to_keep2[xxx] > 1){         # if it does not begin on 1st day of camera operation matrix
      cam.op[xxx, seq(1, first_col_to_keep2[xxx]-1)] <- NA
    }
  }
  
  # set values after end of last occasion NA in camera operation matrix (so effort is 0 afterwards)
  last_col_to_keep2  <- match(as.character(as.Date(date_ranges2$end_last_occasion, tz = timeZone_tmp)), colnames(cam.op))
  
  
  for(yyy in 1:length(last_col_to_keep2)){
    if(last_col_to_keep2[yyy]+1 < ncol(cam.op)){   # if it does not end on last day of camera operation matrix
      cam.op[yyy, seq(last_col_to_keep2[yyy]+1, ncol(cam.op))] <- NA
    }
  }
  
  
  ####################################
  # trim camera operation matrix (taking into account buffer, occasionStartTime(?), maxNumberDays)
  # based on data frame   "date_ranges"   computed by function   createDateRangeTable
  
  if(day1_2 == "station") {    # 1st day of each station OR some specified date
    
    cam.tmp.min <- apply(cam.op, MARGIN = 1, function(X){min(which(!is.na(X)))})    # first occasion of each station
    cam.tmp.max <- apply(cam.op, MARGIN = 1, function(X){max(which(!is.na(X)))})    # last occasion of each station
    
    diff.days.tmp <- cam.tmp.max - cam.tmp.min
    
    cam.op2 <- matrix(NA,
                      nrow = nrow(cam.op),
                      ncol = max(diff.days.tmp)+1)
    
    # make all stations begin in 1st column
    for(l in 1:nrow(cam.op)){
      if(is.finite(diff.days.tmp[l])){
        cam.op2[l, 1:(diff.days.tmp[l]+1)] <- as.vector(cam.op[l,cam.tmp.min[l]:cam.tmp.max[l]])
      }
    }
    
    if(day1_2 == "station") {
      colnames(cam.op2) <- paste("day", 1:ncol(cam.op2), sep = "")
    }
    
    rownames(cam.op2) <- rownames(cam.op)
    
    cam.op <- cam.op2
    
    
  } else {
    
    
    # remove all columns of cam.op that were before beginning of 1st occasion
    first_col_to_keep <- match(as.character(min(as.Date(date_ranges2$start_first_occasion, tz = timeZone_tmp))), colnames(cam.op))
    if(!is.na(first_col_to_keep)){
      if(first_col_to_keep != 1){
        cam.op <- cam.op[,-seq(1, (first_col_to_keep-1))]
      }
    }
    
    # remove all columns of cam.op that were after end of last occasion / after retrieval of last camera
    last_col_to_keep  <- match(as.character(max(as.Date(date_ranges2$end_last_occasion, tz = timeZone_tmp))), colnames(cam.op))
    if(!is.na(last_col_to_keep)){
      if(last_col_to_keep != ncol(cam.op)){
        cam.op <- cam.op[,-seq((last_col_to_keep + 1), ncol(cam.op))]
      }
    }
    
  }
  return(cam.op)
}


# check consistency of species record table before creating detection history (remove records outside date range etc)  ####
cleanSubsetSpecies <- function(subset_species2,
                               stationCol2,
                               date_ranges2
){
  
  nrow_subset_species2 <- nrow(subset_species2)
  
  
  # remove records that were taken before beginning of first occasion (because of buffer, occasionStartTime, day1)
  corrected_start_time_by_record <- date_ranges2$start_first_occasion[match(subset_species2[,stationCol2], rownames(date_ranges2))]
  
  remove.these <- which(subset_species2$DateTime2 < corrected_start_time_by_record)
  if(length(remove.these) >= 1){
    
    warning(paste(length(remove.these), 
                  " records (out of ", 
                  nrow_subset_species2, 
                  ") were removed because they were taken before day1 (if a date was specified),  within the buffer period, or before occasionStartTime on the 1st day, e.g.:\n", sep = ""),
            paste(head(subset_species2[remove.these, stationCol2]), 
                       head(subset_species2$DateTime2[remove.these]), 
                       collapse = "\n", 
                       sep = ": "),
            call. = FALSE)
    
    subset_species2 <- subset_species2[-remove.these,]
    if(nrow(subset_species2) == 0) stop("No more records after removing records before survey begin. The detection history would be empty.")
    
    rm(corrected_start_time_by_record, remove.these)
  }
  
  # remove records that were taken after end of last occasion (because of maxNumberDays)
  corrected_end_time_by_record <- date_ranges2$end_last_occasion[match(subset_species2[,stationCol2], rownames(date_ranges2))]
  
  remove.these2 <- which(subset_species2$DateTime2 > corrected_end_time_by_record)
  if(length(remove.these2) >= 1){
    
    warning(paste("\n", 
                  paste(length(remove.these2), 
                        " records (out of ", 
                        nrow_subset_species2, 
                        ") were removed because they were taken after the end of the last occasion, e.g.:", sep = ""),
                  paste(head(subset_species2[remove.these2, stationCol2]), 
                        head(subset_species2$DateTime2[remove.these2]), 
                        collapse = "\n", 
                        sep = ": "), 
                  sep = "\n"), 
            call. = FALSE)
    
    subset_species2 <- subset_species2[-remove.these2,]
    
    if(nrow(subset_species2) == 0) stop("No more records left. The detection history would be empty.")
    rm(corrected_end_time_by_record, remove.these2)
  }
  
  return(subset_species2)
}


# calculate trapping effort matrix by day   ####
calculateTrappingEffort <- function(cam.op,
                                    occasionLength2,
                                    scaleEffort2,
                                    includeEffort2,
                                    minActiveDaysPerOccasion2,
                                    occasionStartTime2){
  
  ######################
  # calculate trapping effort by station and occasion
  
  if(occasionLength2 == 1){
    effort <- cam.op          # if occasionLength2 = 1 day, it is identical
  } else {
    effort <- matrix(NA, nrow = nrow(cam.op), ncol = ceiling(ncol(cam.op) / occasionLength2 ))
    
    index <- 1
    for(m in 1:ncol(effort)){    # for every occasion in the effort matrix
      # index for columns in camera operation matrix to aggregate
      if(index + occasionLength2 <= ncol(cam.op)){
        index.tmp <- index : (index + occasionLength2 - 1)
      } else {
        index.tmp <- index : ncol(cam.op)
      }
      
      # calculate effort as sum of active days per occasion
      effort[, m] <- apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = sum, na.rm = TRUE)
      # if full occasion NA in cam.op, make effort NA
      effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {sum(is.na(X))}) == length(index.tmp), NA, effort[,m])
      # if full occasion = 0 in cam.op, make effort NA
      effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {all(X == 0)}),   NA, effort[,m])
      
      ## if full occasion is smaller than 1 (i.e. all 0 or NA), set effort NA
      ## EDIT: 2020-09-12: This can cause problems if cameraOperation calculated fraction of days.
      ## Also, the cases with 0 and NA are covered above. So I comment it for now.
      #effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {all(X < 1)}),   NA, effort[,m])
      
      # set cells in effort matrix NA (according to input arguments)
      # this is later used to adjust the detection/non-detection matrix
      if(includeEffort2 == FALSE){
        if(hasArg(minActiveDaysPerOccasion2)){   # includeEffort = FALSE and minActiveDays is defined
          # if occasion has less active days than minActiveDays, set NA
          effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {sum(X, na.rm = TRUE) < minActiveDaysPerOccasion2}), NA, effort[,m])
        } else {                                 # includeEffort = FALSE and minActiveDays not defined
          # if any day NA in cam.op, make  occasion effort NA
          effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {any(is.na(X))}), NA, effort[,m])
          # if any day = 0 in cam.op, make occasion effort NA
          effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {any(X == 0)}),   NA, effort[,m])
          
          if(length(index.tmp) < occasionLength2){  # if occasion is shorter than occasionLength (i.e. last occasion), set NA.
            effort[, m] <- NA
          }
          
        }
        
        ## includeEffort = TRUE
      } else {    
        # if minActiveDays is defined
        if(hasArg(minActiveDaysPerOccasion2)){   
          # if occasion has less actice days than minActiveDays, set NA
          effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {sum(X, na.rm = TRUE) < minActiveDaysPerOccasion2}), NA, effort[,m])
        } else {                                 # includeEffort = TRUE and minActiveDays not defined
          # if all days of occasion NA in cam.op, make  occasion effort NA
          #effort[, m] <- ifelse(apply(as.matrix(cam.op[,index.tmp]), MARGIN = 1, FUN = function(X) {all(is.na(X))}), NA, effort[,m])
          # if all days of occasion = 0 in cam.op, make occasion effort NA
          
        }
      }
      index <- index + occasionLength2
    }
    rm(index, index.tmp)
  }
  
  # scale effort, if required
  if(isTRUE(scaleEffort2)){
    if(occasionLength2 == 1) stop("cannot scale effort if occasionLength is 1", call. = FALSE)
    if(length(table(effort)) == 1) stop(paste("all values of effort are identical (", names(table(effort)), "). Cannot scale effort", sep = ""), call. = FALSE)
    
    scale.eff.tmp <- scale(as.vector(effort))                       # scale effort (as a vector, not matrix)
    scale.eff.tmp.attr <- data.frame(effort.scaled.center = NA,     # prepare empty data frame
                                     effort.scaled.scale = NA)
    scale.eff.tmp.attr$effort.scaled.center[1] <- attr(scale.eff.tmp, which = "scaled:center")   # write scaling parameters to data frame
    scale.eff.tmp.attr$effort.scaled.scale [1] <- attr(scale.eff.tmp, which = "scaled:scale")
    effort <- matrix(scale.eff.tmp, nrow = nrow(effort), ncol = ncol(effort))                    # convert effort vector to matrix again
  }
  
  rownames(effort) <- rownames(cam.op)
  
  # return objects
  if(isTRUE(scaleEffort2)){
    return(list(effort, scale.eff.tmp.attr))
  } else {
    return(list(effort))
  }
}



##########################################################################################################
# for function surveyReport

makeSurveyZip <- function(output,
                          recordTable,
                          CTtable ,
                          speciesCol,
                          stationCol,
                          setupCol,
                          retrievalCol,
                          CTDateFormat,
                          CTHasProblems,
                          Xcol,
                          Ycol,
                          recordDateTimeCol,
                          recordDateTimeFormat,
                          sinkpath,
                          usePackageZip){
  
  wd0 <- getwd()
  on.exit(setwd(wd0))
  
  dir.tmp <- tempdir()
  
  file.sep <- .Platform$file.sep
  
  
  dir.zip <- file.path(dir.tmp, paste("surveyReport_", Sys.Date(), sep = ""))
  dir.zip.short <- paste("surveyReport_", Sys.Date(), sep = "")
  unlink(dir.zip, recursive = TRUE)
  dir.create(dir.zip, showWarnings = FALSE, recursive = TRUE)
  
  # create directories
  invisible(sapply(file.path(dir.zip, c("surveyReport", "activity", "scripts", "detectionMaps")), dir.create, showWarnings = FALSE))
  
  ######
  # save input tables
  
  write.csv(recordTable, file = file.path(dir.zip, "recordTable.csv"), row.names = FALSE)
  write.csv(CTtable, file = file.path(dir.zip, "CTtable.csv"), row.names = FALSE)
  
  ######
  # save surveyReport tables
  
  dir.tmp2 <- file.path(dir.zip, "surveyReport")
  
  for(xyz in 1:length(output)){
    write.csv(output[[xyz]],
              file = file.path(dir.tmp2, paste(names(output)[[xyz]], ".csv", sep = "")),
              row.names = FALSE)
  }
  rm(xyz)
  
  ######
  # make activity plots
  
  activityDensity(recordTable          = recordTable,
                  allSpecies           = TRUE,
                  speciesCol           = speciesCol,
                  recordDateTimeCol    = recordDateTimeCol,
                  recordDateTimeFormat = recordDateTimeFormat,
                  plotR                = FALSE,
                  writePNG             = TRUE,
                  plotDirectory        = file.path(dir.zip, "activity"))
  
  ######
  # make detection maps
  
  if(hasArg(Xcol) & hasArg(Ycol)){
    detectionMaps(CTtable       = CTtable,
                  recordTable   = recordTable,
                  Xcol          = Xcol,
                  Ycol          = Ycol,
                  stationCol    = stationCol,
                  speciesCol    = speciesCol,
                  richnessPlot  = TRUE,
                  speciesPlots  = TRUE,
                  printLabels   = TRUE,
                  plotR         = FALSE,
                  writePNG      = TRUE,
                  plotDirectory = file.path(dir.zip, "detectionMaps"),
                  pngMaxPix     = 1000
    )
  }
  
  ########################################################################################
  # prepare scripts
  
  scriptfile <- file.path(dir.zip, "scripts", "camtrapR_scripts.R")
  file.create(scriptfile, showWarnings = FALSE)
  
  # load basic data
  
  sink(file = scriptfile)
  cat("###  load data tables  ### \n\n")
  
  cat("directory.data <- PLEASE_SPECIFY        # this is the directory you got after unzipped the zip file to (e.g. .../surveyReport_2016-02-29/)  \n\n")
  
  
  cat("CTtable     <- read.csv(paste(directory.data, 'CTtable.csv', sep = '/'))\n")
  cat("recordTable <- read.csv(paste(directory.data, 'recordTable.csv', sep = '/'))\n\n\n")
  
  sink()
  
  
  # make detection maps   # no, because of coordinate columns
  
  sink(file = scriptfile, append = TRUE)
  cat("###  plot species detections  ### \n\n")
  if(hasArg(Xcol) & hasArg(Ycol)){
    cat(paste("Xcol <- '", Xcol, "'\n", sep = ""))
    cat(paste("Ycol <- '", Ycol, "'\n\n", sep = ""))
  } else {
    cat("Xcol <- PLEASE_SPECIFY\n")
    cat("Ycol <- PLEASE_SPECIFY\n\n")
  }
  
  cat(paste("detections <- detectionMaps(CTtable = CTtable,
            recordTable  = recordTable,
            Xcol         = Xcol,
            Ycol         = Ycol,
            stationCol   = '", stationCol, "',
            speciesCol   = '", speciesCol, "',
            writePNG     = FALSE,
            plotR        = TRUE,
            printLabels  = TRUE,
            richnessPlot = TRUE,
            addLegend    = TRUE
  ) \n\n\n", sep = ""))

  sink()
  
  
  # camera operation matrix
  
  sink(file = scriptfile, append = TRUE)
  cat("###  camera operation matrix  ### \n\n")
  
  cat(paste("camOp <- cameraOperation(CTtable = CTtable,
            stationCol                        = '", stationCol, "',
            #cameraCol,
            setupCol                          = '", setupCol, "',
            retrievalCol                      = '", retrievalCol, "',
            hasProblems                       = '", CTHasProblems, "',
            #byCamera,
            #allCamsOn,
            #camerasIndependent,
            dateFormat                        = '", CTDateFormat, "' #,
            #writecsv                         = FALSE,
            #outDir
  ) \n\n\n", sep = ""))

  sink()
  
  
  # make detection histories
  
  sink(file = scriptfile, append = TRUE)
  cat("###  detection histories  ### \n\n")
  
  cat("day1              <- PLEASE_SPECIFY \n")
  cat("occasionLength    <- PLEASE_SPECIFY\n")
  cat("speciesOfInterest <- PLEASE_SPECIFY\n")
  cat("timeZone          <- PLEASE_SPECIFY \n\n")
  
  cat(paste("detHist <- detectionHistory(recordTable = recordTable,
            species                                  = speciesOfInterest,
            camOp                                    = cameraOperation,
            stationCol                               = '", stationCol, "',
            speciesCol                               = '", speciesCol, "',
            recordDateTimeCol                        = '", recordDateTimeCol, "',
            recordDateTimeFormat                     = '", recordDateTimeFormat, "',
            occasionLength                           = occasionLength,
            #maxNumberDays,
            day1                                     = day1,
            #buffer                                  = 0,
            includeEffort                            = TRUE,
            scaleEffort                              = FALSE,
            datesAsOccasionNames                     = FALSE,
            timeZone                                 = timeZone,
            writecsv                                 = FALSE #,
            # outDir
  ) \n\n\n", sep = ""))

  sink()
  
  
  ### write description text file ###
  
  readmefile <- file.path(dir.zip, "readme.txt")
  file.create(readmefile, showWarnings = FALSE)
  
  sink(file = readmefile)
  
  
  cat(paste("this zip file contains a summary of a camera trapping survey:\n\n"))
  cat(paste("total survey period:              ", min(output$survey_dates$setup_date), "-", max(output$survey_dates$retrieval_date), "\n"))
  cat(paste("Total number of stations:         ", nrow(output$survey_dates), "\n"))
  cat(paste("Number of operational stations:   ", length(which(output$survey_dates$n_nights_active >= 1)), "\n"))
  cat(paste("Total number of cameras:          ", sum(output$survey_dates$n_cameras), "\n"))
  cat(paste("Total number of active trap days: ", sum(output$survey_dates$n_nights_active), "\n\n"))
  
  cat("\n-------------------------------------------------------\n\n")
  
  cat(paste("the following table shows a summary of survey period for each station \n\n"))
  print(output$survey_dates, row.names = FALSE)
  cat("\n-------------------------------------------------------\n\n")
  
  cat("legend to the data structure in the zip file:\n\n")
  cat(".../activity/              plots of activity density estimations for each species created with activityDensity\n")
  if(hasArg(Xcol) & hasArg(Ycol)){cat(".../detectionMaps/         maps of observed species richness and numbers of species records\n")}
  cat(".../scripts/               a prepared R script for occupancy analyses\n")
  cat(".../surveyReport/          the tables created by surveyReport summarising the survey\n")
  cat(".../CTtable.csv            table of camera trap station IDs, operation times and coordinates\n")
  cat(".../recordTable.csv        table of species records\n")
  cat(".../readme.txt             this file\n\n")
  
  cat("\n-------------------------------------------------------\n\n")
  
  cat(paste("species images are located in:\n\n"))
  cat(paste(as.character(recordTable$Directory)[1], "          # the first record\n"))
  cat(paste(as.character(recordTable$Directory)[nrow(recordTable)], "          # the last record\n"))
  
  cat("\n-------------------------------------------------------\n\n")
  
  cat("information about who created this report:\n\n")
  print(as.data.frame(Sys.info()))
  
  cat(paste("\n\n\n\n ***   report created by function surveyReport on",  Sys.time(),  "   ***\n\n\n"))
  
  
  sink()
  
  ######
  # make final zip file
  
  # list files
  files2zip <- dir(path = dir.zip, full.names = FALSE, recursive = TRUE)
  files2zip <- file.path(dir.zip.short, files2zip)
  
  # write zip
  setwd(dir.tmp)
  
  if(isFALSE(usePackageZip)) {
  zip(zipfile = file.path(sinkpath,
                                           paste(dir.zip.short, ".zip", sep = "")),
                       files   = files2zip,
                       flags   = "")
  }
  
  if(isTRUE(usePackageZip)) {
    zip::zipr(zipfile = file.path(sinkpath,
                                  paste(dir.zip.short, ".zip", sep = "")),
              files   = files2zip)
  }
  
  # check if output was created
  if(file.exists(file.path(sinkpath,
                           paste(dir.zip.short, ".zip", sep = "")))){
  message("zip file compiled \n",
      paste(sinkpath, paste(dir.zip.short, ".zip\n\n", sep = ""), sep = file.sep))
  } else {
    message("zip file creation failed")
  }
}



##########################################################################################################
# assign session IDs to records in recordTableIndividual based on a session column in camera trap table    ####
# This is for when there are several entries for each station, one for each session
# in other words, if stations were operated continuously and session start/end dates are defined in camera trap table

assignSessionIDtoRecordTableIndividual <- function(recordTableIndividual_tmp,
                                                   cameraTrapTable_tmp,
                                                   stationCol,
                                                   sessionCol,
                                                   setup_date_col,
                                                   retrieval_date_col){
  
  # check input
  if(sessionCol %in% colnames(recordTableIndividual_tmp)) stop("recordTableIndividual has a session column already. Cannot assign new session ID",
                                                               call. = FALSE)
  if(!setup_date_col %in% colnames(cameraTrapTable_tmp)) stop("CTtable setup column not found",
                                                              call. = FALSE)
  if(!retrieval_date_col %in% colnames(cameraTrapTable_tmp)) stop("CTtable retrieval column not found",
                                                                  call. = FALSE)
  
  # define "between" function
  `%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]
  
  # add an empty session column to the record table
  recordTableIndividual_tmp[, sessionCol] <- NA
  
  # for each station, find the respective sessions and assign the session IDs to the records based on their date.
  for(i in 1:length(unique(cameraTrapTable_tmp[,stationCol]))){
    
    # find index of records in station i and respective dates
    record_rows_id_tmp <- which(recordTableIndividual_tmp[,stationCol] == unique(cameraTrapTable_tmp[,stationCol])[i])
    rec_dates_tmp <- recordTableIndividual_tmp$Date [record_rows_id_tmp]
    
    # NOTE: output may be assigned to wrong session if occasionStartTime != 0!!!
    # There is also an issue when station ID is a combination of station and session IDs.
    
    # extract session start/end dates (by session)
    session_id    <- cameraTrapTable_tmp[, sessionCol]         [cameraTrapTable_tmp[,stationCol] == unique(cameraTrapTable_tmp[,stationCol])[i]]
    session_start <- cameraTrapTable_tmp[, setup_date_col]     [cameraTrapTable_tmp[,stationCol] == unique(cameraTrapTable_tmp[,stationCol])[i]]
    session_end   <- cameraTrapTable_tmp[, retrieval_date_col] [cameraTrapTable_tmp[,stationCol] == unique(cameraTrapTable_tmp[,stationCol])[i]]
    
    # assign session ID to recordTable
    for(session_id_tmp in session_id){
      record_rows_session_id_tmp <- which(rec_dates_tmp %between% c(session_start[session_id_tmp], session_end[session_id_tmp]))
      recordTableIndividual_tmp[record_rows_id_tmp, sessionCol]  [record_rows_session_id_tmp] <- session_id_tmp
    }
    rm(session_id, session_start, session_end, record_rows_id_tmp, rec_dates_tmp)
  }
  
  # create new station columns based on station + session (and create backup first)
  
  recordTableIndividual_tmp$station_backup <- recordTableIndividual_tmp[, stationCol]
  cameraTrapTable_tmp$station_backup       <- cameraTrapTable_tmp[, stationCol]
  
  recordTableIndividual_tmp[, stationCol] <- paste(recordTableIndividual_tmp[, stationCol], recordTableIndividual_tmp[, sessionCol], sep = "_session")
  cameraTrapTable_tmp[, stationCol]       <- paste(cameraTrapTable_tmp[, stationCol], cameraTrapTable_tmp[, sessionCol], sep = "_session")
  
  
  return(list(recordTable = recordTableIndividual_tmp,
              CTtable     = cameraTrapTable_tmp))
}



assignSessionIDtoRecordTable <- function(recordTable_tmp,
                                         camOp,
                                         dateTimeCol,
                                         stationCol,
                                         sessionCol
){
  
  
  camop.info.df <- deparseCamOpRownames(camOp)
  
  # if there's records at stations that are not in the camop row names
  if(!any(recordTable_tmp[, stationCol] %in% unique(camop.info.df$station))) {
    stop("Stations in record table station column don't match stations in row names of camera operation matrix.")
  }
    
  if(!all(recordTable_tmp[, stationCol] %in% unique(camop.info.df$station))) {
    warning(paste("Some stations in the record table are not matched by stations in camOp row names:\n",
                  paste(unique(recordTable_tmp[!recordTable_tmp[, stationCol] %in% unique(camop.info.df$station), stationCol]), collapse = ", ")))
  }
  
  recordTable_tmp[, sessionCol] <- NA
  
  record_dates_tmp <- as.Date(recordTable_tmp[, dateTimeCol])
  if(all(is.na(record_dates_tmp))) stop("Could not interpret record date/time. Output is all NA.")
  
  record_dates_backup <- record_dates_tmp
  camOp_dates_tmp  <- as.Date(colnames(camOp))
  
  # remove records before / after camOp date range
  records_too_early <- which(sapply(record_dates_tmp, FUN = function(x) x < min(camOp_dates_tmp)))
  
  if(length(records_too_early) > 0) {
    recordTable_tmp <- recordTable_tmp[-records_too_early, ]
    warning(paste(length(records_too_early), "records were removed because they were before the first day in the camera operation matrix"), call. = FALSE)
    record_dates_tmp <- as.Date(recordTable_tmp[, dateTimeCol])
  }
  
  records_too_late  <- which(sapply(record_dates_tmp, FUN = function(x) x > max(camOp_dates_tmp)))
  
  if(length(records_too_late) > 0) {
    recordTable_tmp <- recordTable_tmp[- records_too_late, ]
    warning(paste(length(records_too_late), "records were removed because they were after the last day in the camera operation matrix"), call. = FALSE)
    record_dates_tmp <- as.Date(recordTable_tmp[, dateTimeCol])
  }
  
  if(nrow(recordTable_tmp) == 0) stop(paste0("Date range mismatch between records (", paste(range(record_dates_backup), collapse = " - "),
                                            ") and camera operation matrix (", paste(range(camOp_dates_tmp), collapse = " - "), ")"))
  
  for(i in 1:nrow(camop.info.df)){
    # for every row in camOp, find all records at that station  
    which_tmp <- which(recordTable_tmp[, stationCol] == camop.info.df$station[i])
    
    if(length(which_tmp) >= 1){
      # of the records at that station, which are not NA in that row of the camOp (there are different rows for different sessions)
      which_tmp2 <- which(!is.na(camOp[i, as.character(record_dates_tmp[which_tmp])]))
      
      recordTable_tmp[which_tmp[which_tmp2], sessionCol] <- camop.info.df$session[i]
    }
  }
  
  which_na <- which(is.na(recordTable_tmp[, sessionCol]))
  
  if(length(which_na) > 0) {
    recordTable_tmp_na <- recordTable_tmp[which_na, ]
    recordTable_tmp <- recordTable_tmp[-which_na, ]
    if(length(which_na) == nrow(recordTable_tmp)) {
      stop("Could not assign session to any record. Please check format of column names of camOp and station column of record table")
    } else {
      warning(paste0(length(which_na), " records were removed because they could not be assigned to an active station / session:\n",
                    paste(recordTable_tmp_na[, stationCol], recordTable_tmp_na[, dateTimeCol], sep = ": ", collapse = "\n")), call. = FALSE)
    }
  }
  
  separatorSession <- "__SESS_"
  stationCol_backup <- paste(stationCol, "backup", sep = "_")
  
  recordTable_tmp[, stationCol_backup] <- recordTable_tmp[, stationCol]
  
  recordTable_tmp[, stationCol] <- paste(recordTable_tmp[, stationCol], recordTable_tmp[, sessionCol], sep = separatorSession)
  
  return(recordTable_tmp)
}



# checks if input is data.frame or tibble. If tibble, either convert to data.frame or stop  ####

dataFrameTibbleCheck <- function(df, 
                                 tibble_allowed = TRUE, 
                                 data_table_allowed = TRUE){
  
  # check if it is a data.frame at all
  if(!is.data.frame(df)) stop(paste(substitute(df), "must be a data.frame"), call. = FALSE)
  
  # handling tibbles (tidyverse)
  if (requireNamespace("tibble", quietly = TRUE)) {
    
    if(tibble::is_tibble(df)) {
      if(tibble_allowed) {
        message (paste(substitute(df), "was converted from tibble to data.frame"))
        df <- as.data.frame(df)
      } else {
        stop (paste(substitute(df), "is a tibble. Please provide a data.frame instead (use read.csv() or as.data.frame())"), call. = FALSE)
      }
    }
  }
  
  # handling data.tables
  
  if (requireNamespace("data.table", quietly = TRUE)) {
    
    if(data.table::is.data.table(df)) {
      if(data_table_allowed) {
        message (paste(substitute(df), "was converted from data.table to data.frame"))
        #df <- as.data.frame(df)
        df <- setDF(df)
      } else {
        stop (paste(substitute(df), "is a data.table Please provide a data.frame instead (use read.csv() or as.data.frame())"), call. = FALSE)
      }
    }
  }
  return(df)
}


# check and convert dates (character) to date objects, either with base functions or lubridate    ####

# CHECK: timeZone argument needed in surveyReport?

parseDateObject <- function(inputColumn,     
                            dateFormat,   
                            checkNA,      # throw error if there are NAs in input (only setup / retrieval, not problems)
                            checkEmpty,    # throw error if there are blank values in input  (only setup / retrieval, not problems)
                            allowEmptyOutput = FALSE,
                            returndatetime = FALSE
){
  
  #if(!class(inputColumn) %in% c("factor", "character")) stop(paste("date column must be a factor or character:", deparse(substitute(inputColumn))), call. = FALSE)
  
  if(checkNA & any(is.na(inputColumn)))   stop(paste("there are NAs in", deparse(substitute(inputColumn))), call. = FALSE)
  if(checkEmpty & any(inputColumn == "")) stop(paste("there are blank values in", deparse(substitute(inputColumn))), call. = FALSE)
  
  if(all(inputColumn == "") & allowEmptyOutput) return(NA)
  
  inputColumn.char <- as.character(inputColumn)
  
  # option 1: base functions for dates as per strptime (identified by "%")
  if(grepl(pattern = "%", x = dateFormat, fixed = TRUE)){
    if(any(grepl(pattern = " ", inputColumn.char))) warning(paste0("There are spaces in ", deparse(substitute(inputColumn)), ", but not in dateFormat\n"), call. = FALSE)
    out <- as.Date(inputColumn.char,     format = dateFormat)
  } else {
  # option 2: lubridate functions (identified by absence of "%")
    if(!requireNamespace("lubridate", quietly = TRUE)) stop(paste("package 'lubridate' is required for the specified dateFormat", dateFormat))
    
    out <- lubridate::date(lubridate::parse_date_time(inputColumn.char, orders = dateFormat))
  }
  
  if(all(is.na(out))) stop(paste0("Cannot read date format in ", deparse(substitute(inputColumn)), ". Output is all NA.\n",
                                  "expected:  ", dateFormat, "\nactual:    ", inputColumn[1]), call. = FALSE)
  
  if(checkNA & any(is.na(out))) stop(paste("At least one entry in", deparse(substitute(inputColumn)), "cannot be interpreted using dateFormat:", dateFormat, "\n",
                                           "rows", paste(which(is.na(out)), collapse = ", ")), call. = FALSE)
  if(isTRUE(returndatetime))  return(as_datetime(out, tz = "UTC"))
  if(isFALSE(returndatetime)) return(out)
}


# check and convert date - time (character) to datetime objects (POSIXlt), either with base functions or lubridate    ####

parseDateTimeObject <- function(inputColumn,   
                                dateTimeFormat,
                                timeZone,
                                checkNA = TRUE,      # throw error if there are NAs in input (only setup / retrieval, not problems)
                                checkEmpty = TRUE,   # throw error if there are blank values in input  (only setup / retrieval, not problems)
                                checkNA_out = TRUE,  # throw error when there is NAs in output (FALSE so reporting is done by detectionHistory, which returns correct row numbers)
                                allowEmptyOutput = FALSE,  
                                quiet = FALSE
){
  
  
  if("POSIXct" %in% class(inputColumn)){
    message(paste("datetime column is in POSIXct format. Converting to character:", deparse(substitute(inputColumn)), ""), call. = FALSE)
    inputColumn <- as.character(inputColumn)
  } else {
    if(!class(inputColumn) %in% c("factor", "character")) stop(paste("datetime column must be a factor or character:", deparse(substitute(inputColumn))), call. = FALSE)
  }
  
  if(checkNA & any(is.na(inputColumn)))   stop(paste("there are NAs in", deparse(substitute(inputColumn))), call. = FALSE)
  if(checkEmpty & any(inputColumn == "")) stop(paste("there are blank values in", deparse(substitute(inputColumn))), call. = FALSE)
  
  if(all(inputColumn == "") & allowEmptyOutput) return(NA)
  
  
  inputColumn.char <- as.character(inputColumn)
  
  # option 1: base functions for dates as per strptime (identified by "%")
  if(grepl(pattern = "%", x = dateTimeFormat, fixed = TRUE)){
    out <- as.POSIXlt(inputColumn.char, tz = timeZone, format = dateTimeFormat)
  } else {
    # option 2: lubridate functions (identified by absence of "%")
    if(!requireNamespace("lubridate", quietly = TRUE)) stop(paste("package 'lubridate' is required for the specified dateTimeFormat", dateTimeFormat))
    
    out <- lubridate::parse_date_time(inputColumn.char, orders = dateTimeFormat, tz = timeZone, quiet = quiet)
  }
  
  if(all(is.na(out))) stop(paste0("Cannot read datetime format in ", deparse(substitute(inputColumn)), ". Output is all NA.\n",
  "expected:  ", dateTimeFormat, "\nactual:    ", inputColumn[1]), call. = FALSE)
  
  if(checkNA_out & any(is.na(out))) stop(paste(sum(is.na(out)), "out of", length(out), "records in",
                                               deparse(substitute(inputColumn)), "cannot be interpreted using dateTimeFormat:", dateTimeFormat, "\n",
                                               "rows", paste(which(is.na(out)), collapse = ", ")), call. = FALSE)
  
  
  if(!any(class(out) %in% c("POSIXct", "POSIXlt"))) stop("couldn't interpret recordDateTimeCol of recordTable using specified recordDateTimeFormat. Output is not POSIX object")
  return(out)
}

## make a new empty matrix, a row for each unique station / camera combination
stationSessionCamMatrix <- function(CTtable,
                                    stationCol,
                                    cameraCol, 
                                    sessionCol, 
                                    setupCol,
                                    retrievalCol
){
  
  separatorCam     <- "__CAM_"
  separatorSession <- "__SESS_"
  
  double_underscore <- "__"
  
  if(length(grep(pattern = double_underscore, x = CTtable[,stationCol])) >= 1) stop(paste("Station IDs may not contain double underscores", double_underscore), call. = FALSE)
  
  
  # convert setup and retrievalCol to Dates (in case they contain times)
  CTtable[,setupCol]     <- as.Date(as.character(CTtable[,setupCol]))
  CTtable[,retrievalCol] <- as.Date(as.character(CTtable[,retrievalCol]))
  
  
  if(hasArg(sessionCol)){
    if(any(CTtable[,sessionCol] == "")) stop("there are empty cells in sessionCol Please provide camera IDs for all cameras",
                                             call. = FALSE)    
    if(any(is.na(CTtable[,sessionCol]))) stop("there are NAs in sessionCol Please provide camera IDs for all cameras",
                                              call. = FALSE)    
    if(length(grep(pattern = double_underscore, x = CTtable[,sessionCol])) >= 1)  stop(paste("Session IDs may not contain double underscores", double_underscore),  call. = FALSE)
    
    stationsession <- paste(CTtable[,stationCol], CTtable[,sessionCol], sep = separatorSession)
    m <- matrix(ncol = abs(as.integer(max(CTtable[,retrievalCol]) - min(CTtable[,setupCol]))) + 1,
                nrow  = length(stationsession))
    colnames(m) <- as.character(as.Date(min(CTtable[,setupCol]):max(CTtable[,retrievalCol]), origin = "1970-01-01"))
    rownames(m) <- stationsession
  }
  
  if(hasArg(cameraCol)){
    if(any(CTtable[,cameraCol] == "")) stop("there are empty cells in cameraCol. Please provide camera IDs for all cameras",
                                            call. = FALSE)    
    if(any(is.na(CTtable[,cameraCol]))) stop("there are NAs in cameraCol. Please provide camera IDs for all cameras",
                                             call. = FALSE)    
    if(length(grep(pattern = double_underscore, x = CTtable[,cameraCol])) >= 1)  stop(paste("Camera IDs may not contain double underscores", double_underscore),  call. = FALSE)
    
    
    stationcam <- paste(CTtable[,stationCol], CTtable[,cameraCol], sep = separatorCam)
    m <- matrix(ncol = abs(as.integer(max(CTtable[,retrievalCol]) - min(CTtable[,setupCol]))) + 1,
                nrow  = length(stationcam))
    colnames(m) <- as.character(as.Date(min(CTtable[,setupCol]):max(CTtable[,retrievalCol]), origin = "1970-01-01"))
    rownames(m) <- stationcam
  }
  
  if(hasArg(sessionCol) & hasArg(cameraCol)){
    
    stationsessioncam <- paste(CTtable[,stationCol], separatorSession, CTtable[,sessionCol], separatorCam, CTtable[,cameraCol], sep = "")
    m <- matrix(ncol = abs(as.integer(max(CTtable[,retrievalCol]) - min(CTtable[,setupCol]))) + 1,
                nrow  = length(stationsessioncam))
    colnames(m) <- as.character(as.Date(min(CTtable[,setupCol]):max(CTtable[,retrievalCol]), origin = "1970-01-01"))
    rownames(m) <- stationsessioncam
  }
  
  if(!hasArg(sessionCol) & !hasArg(cameraCol)){
    
    m <- matrix(ncol = abs(as.integer(max(CTtable[,retrievalCol]) - min(CTtable[,setupCol]))) + 1,
                nrow  = length(CTtable[, stationCol]))
    colnames(m) <- as.character(as.Date(min(CTtable[,setupCol]):max(CTtable[,retrievalCol]), origin = "1970-01-01"))
    rownames(m) <- CTtable[, stationCol]
  }

  return(m)
}

# analyse row names of camera operation matrix to extract station, camera and session information (the latter two only if applicable)
deparseCamOpRownames <- function(camOp){
  
  separatorCam <- "__CAM_"
  separatorSession <- "__SESS_"
  
  if(is.data.frame(camOp)) camOp <- as.matrix(camOp)
  
  # extract rownames, if it is a matrix (if not I assume it is a vector, the result of rownames(camOp))
  if(is.matrix(camOp)){
    x <- rownames(camOp)
  } else {
    x <- camOp
  }
  
  if(any(grepl(separatorSession, x)) &
     any(grepl(separatorCam, x))){
    x2 <- strsplit(x, split = separatorSession)
    
    stationIDs <- sapply(x2, FUN = function(x)x[1])
    
    sessionCameraIDs <- sapply(x2, FUN = function(x)x[2])
    
    x3 <- strsplit(sessionCameraIDs, split = separatorCam)
    
    sessionIDs <- sapply(x3, FUN = function(x)x[1])
    cameraIDs  <- sapply(x3, FUN = function(x)x[2])
    
    return(data.frame(station = stationIDs,
                      session = sessionIDs,
                      camera = cameraIDs, 
                      stringsAsFactors = FALSE))
  }
  
  if(any(grepl(separatorSession, x))){
    x2 <- strsplit(x, split = separatorSession)
    
    stationIDs <- sapply(x2, FUN = function(x)x[1])
    sessionIDs <- sapply(x2, FUN = function(x)x[2])
    
    return(data.frame(station = stationIDs,
                      session = sessionIDs, 
                      stringsAsFactors = FALSE))
  }
  
  if(any(grepl(separatorCam, x))){
    x2 <- strsplit(x, split = separatorCam)
    
    stationIDs <- sapply(x2, FUN = function(x)x[1])
    cameraIDs  <- sapply(x2, FUN = function(x)x[2])
    
    return(data.frame(station = stationIDs,
                      camera = cameraIDs, 
                      stringsAsFactors = FALSE))
  }
  return(data.frame(station = x, 
                    stringsAsFactors = FALSE))
}



# add NA columns to matrix to achieve a certain number of columns (for creating multi-seasons unmarked frames)
padMatrixWithNA <- function(mat, ncol_desired){
  
  if(ncol_desired < ncol(mat)){
    mat.out <- matrix(c(mat, 
                        rep(NA, times = nrow(mat) * (ncol_desired - ncol(mat)))),
                      ncol = ncol_desired, 
                      nrow = nrow(mat)
    )  
    rownames(mat.out) <- rownames(mat)
    colnames(mat.out) <- colnames(mat)
    return(mat.out)
  } else {
    return(mat)
  }
}

# for progress indicator in user messages (28 charcters wide in total)
makeProgressbar <- function(current, 
                            total){
  progress_bar_width <- 20
  perc <- current / total
  
  pb <- paste("      |", 
              paste(rep("=", times = round(perc * progress_bar_width)), collapse = ""), 
              paste(rep(" ", times = progress_bar_width - round(perc * progress_bar_width)), collapse = ""), 
              "|", 
              "  ", formatC(round(perc * 100), width = 3), "%", sep = "")
}


# access digiKam database and provide tables to extract species tags for videos
# call before exiftool

accessDigiKamDatabase <- function(db_directory,   # database directory 
                                  db_filename     # database filename
)
{
  # ensure database directory and file exist
  if(!dir.exists(db_directory)) stop("Could not find directory 'db_directory'", call. = FALSE)
  if(!file.exists(file.path(db_directory, db_filename))) stop("Could not find db_filename in db_directory", call. = FALSE)
  
  # establish database connection
  con <- RSQLite::dbConnect(RSQLite::SQLite(), file.path(db_directory, db_filename))
  
  # read tables
  Images      <- RSQLite::dbReadTable(con, 'Images')
  Tags        <- RSQLite::dbReadTable(con, 'Tags')
  ImageTags   <- RSQLite::dbReadTable(con, 'ImageTags')
  Albums      <- RSQLite::dbReadTable(con, 'Albums')
  AlbumRoots  <- RSQLite::dbReadTable(con, 'AlbumRoots')
  
#  ImageInformation  <- RSQLite::dbReadTable(con, 'ImageInformation')
#  ImageMetadata     <- RSQLite::dbReadTable(con, 'ImageMetadata')
  
  # disconnect database  
  RSQLite::dbDisconnect(con)
  
  # make output
  return(list(Albums           = Albums,
              AlbumRoots       = AlbumRoots,
              Images           = Images, 
              Tags             = Tags,
              ImageTags        = ImageTags#,
#              ImageInformation = ImageInformation,
#              ImageMetadata    = ImageMetadata
))
}



# extract species tags of videos from digiKam database tables

digiKamVideoHierarchicalSubject <- function(stationDir,
                                    digiKamTablesList,    # output of accessDigiKamDatabase
                                    videoFormat           # character vector of desired video formats
)
{
  
  Albums           <- digiKamTablesList$Albums
  AlbumRoots       <- digiKamTablesList$AlbumRoots
  Images           <- digiKamTablesList$Images
  Tags             <- digiKamTablesList$Tags
  ImageTags        <- digiKamTablesList$ImageTags

  
  # add platform file separator to stationDir
  stationDir0 <- stationDir
  stationDir <- paste0(stationDir, .Platform$file.sep)
  
  # combine album root and album path (match by albumRoot id, not index position)
  Albums <- merge(Albums, AlbumRoots, by.x = "albumRoot", by.y = "id", sort = FALSE)
  Albums$albumPath_full <- paste0(Albums$specificPath, Albums$relativePath)
  
  # add drive letter (only relevant on Windows, and can potentially be wrong if there's Album roots on different drives)
  # also not sure if this works on Mac / Linux due to missing drive letters
  if(.Platform$OS.type == "windows"){
    Albums$albumPath_full2 <- paste(substr(stationDir, 1,2),   # the Drive letter, digiKam doesn't return it
                                    Albums$albumPath_full,
                                    sep = "")
  }
  
  # Linux/Mac solution?
  if(.Platform$OS.type == "unix"){
    Albums$albumPath_full2 <- Albums$albumPath_full#paste(substr(stationDir, 1,2),
                                    #Albums$albumPath_full,
                                    #sep = "")
  }
  
  
  
  # add "/" to ensure Station1 doesn't include Station10 also. Also ensure all folders end with one / only
  Albums$albumPath_full2 <- ifelse(endsWith(Albums$albumPath_full2, .Platform$file.sep),
                                   Albums$albumPath_full2,
                                   paste0(Albums$albumPath_full2, .Platform$file.sep)) 
  
  pathColumn <- "albumPath_full2"
  
  # see if stationDir exists in database
  if(!stationDir %in% Albums[, pathColumn]){
    stop(paste("station directory", stationDir,  "was not found in digiKam albums. Skipping"), call. = FALSE)
    # try to handle with a warning instead
    # warning(paste("station directory", stationDir,  "was not found in digiKam albums. Skipping"), call. = FALSE, immediate. = T)
    # next
  }
  
  # find current station in albums
  #album_of_interest <-Albums [which(Albums[, pathColumn] == stationDir),]   # only return the station directory, not camera subdirectories
  album_of_interest <- Albums [grep(pattern = stationDir, Albums[, pathColumn]),]   # This one returns Station directory and camera subdirectories
  if(nrow(album_of_interest) == 0) {
    warning("Could not locate album for ", stationDir, ". Skipping", call. = FALSE)   # NOTE TO SELF: DOESN'T SKIP OR BREAK. CHANGE?
  }
  
  # keep only images in the current album
  image_subset <- Images[Images$album %in% album_of_interest$id,]  # returns matches for all directories. Also, no NAs apparently
  
  # add stationDirectory 
  image_subset$stationDir <- stationDir0
  
  # to do: handle situation where there's no images in image_subset
  
  # NAs are possible, so remove them
  if(any(is.na(image_subset$id))) {
    image_subset <- image_subset[!is.na(image_subset$id),]
  }
  
  # keep only desired video files
  image_subset2 <- image_subset[tolower(substr(image_subset$name, 
                                               nchar(image_subset$name) - 3, 
                                               nchar(image_subset$name))) %in% 
                                  paste(".", videoFormat, sep = ""),]
  
  image_subset_others <- image_subset[!tolower(substr(image_subset$name, 
                                               nchar(image_subset$name) - 3, 
                                               nchar(image_subset$name))) %in% 
                                  paste(".", videoFormat, sep = ""),]
  
  #warning if no videos found
  if(nrow(image_subset2) == 0) {
    warning("Could not find any ", paste(videoFormat, collapse = "/"), " files in ", stationDir, call. = FALSE)
  }
  
  # find "Species" tag group and its children
   
   # subset image tags
   ImageTags <- ImageTags[ImageTags$tagid %in% Tags$id,]
   
   # get proper labels for image tags (and their parent labels = tag group names)
   ImageTags$cleartext_child  <- Tags$name [match(ImageTags$tagid, Tags$id)]
   
   Tags$parent_name <- Tags$name[match(Tags$pid, Tags$id)]
   ImageTags$cleartext_parent <- Tags$parent_name[match(ImageTags$tagid, Tags$id)]    #alternative to above, seems to work (and above seems wrong suddenly)
   
   
   # # # solution by Joel Ruprecht (Google group 2020-07-21) - if a station has no tags with parent ID, doesn't seem to work yet, but should be identical to above solution
   # parentNA <- which(is.na(Tags$name.parent))
   # Tags$name.parent[parentNA] <- Tags$name[parentNA]
   # ImageTags$cleartext_parent_Joel <- Tags$name.parent[match(ImageTags$tagid, Tags$id)]
   
   
   
   # combine parent and child to create HierarchicalTags
   ImageTags$cleartext_full <- paste(ImageTags$cleartext_parent, ImageTags$cleartext_child, sep = "|")
   
   
   # remove unnecessary (internal) tags (not essential)
    remove1 <- grep(Tags$name, pattern = "_Digikam_Internal_Tags_")
    remove2 <- grep(Tags$name, pattern = "Color Label ")
    remove3 <- grep(Tags$name, pattern = "Pick Label ")

    Tags <- Tags[!Tags$id  %in% c(remove1, remove2, remove3),]
    Tags <- Tags[!Tags$pid %in% c(remove1, remove2, remove3),]
   
   ImageTags <- ImageTags[!ImageTags$tagid %in% c(remove1, remove2, remove3),]

   # combine multiple tags for images into single field "HierarchicalSubject"
   ImageTags_aggregate <- aggregate(ImageTags$cleartext_full,
                                    by = list(ImageTags$imageid),
                                    FUN = paste, sep  = "", collapse = ", ")

   # assign column names to output
   colnames(ImageTags_aggregate) <- c("imageid", "HierarchicalSubject")
   
   # assign HierarchicalSubject to matching images
   image_subset2$HierarchicalSubject <- ImageTags_aggregate$HierarchicalSubject[match(image_subset2$id, ImageTags_aggregate$imageid)]
  
   return(image_subset2) 
}

# process the "video" argument of recordTable

processVideoArgument <- function(IDfrom = IDfrom,
                                 video = video){
  
  if(!exists("file_formats",  where = video)) stop("'file_formats' is missing in argument 'video'", call. = FALSE)
  if(!exists("dateTimeTag",   where = video)) stop("'dateTimeTag' is missing in argument 'video'", call. = FALSE)
  
  file_formats <- video$file_formats
  
  if(any(duplicated(file_formats))) stop("There are duplicates in file_formats (in the video argument")
  
  # check file_formats argument
  if(!is.character(file_formats)) stop("'file_formats' in argument 'video' must be of class 'character'", call. = FALSE)
  file_formats <- tolower(file_formats)
  
  # access digiKam database, if required
  if(IDfrom == "metadata"){
    if(!exists("db_directory", where = video)) stop("'db_directory' is missing in argument 'video'", call. = FALSE)
    if(!exists("db_filename",  where = video)) stop("'db_filename' is missing in argument 'video'", call. = FALSE)
    if(!dir.exists(video$db_directory)) stop("directory 'db_directory' does not exist", call. = FALSE)
    if(!file.exists(file.path(video$db_directory, video$db_filename))) stop(paste("file 'db_filename' in directory 'db_directory' does not exist\n",
                                                                                  file.path(video$db_directory, video$db_filename)), call. = FALSE)
    
    if (!requireNamespace("RSQLite", quietly = TRUE)) {
      stop("the package 'RSQLite' is needed for extracting data from digiKam database,  you can install it via: install.packages('RSQLite')")
    } 
   
    # read digiKam database with helper function
    digiKam_data <- accessDigiKamDatabase (db_directory = video$db_directory,
                                           db_filename = video$db_filename)
  } else {    # if IDfrom != "metadata"
    digiKam_data <- NULL
  }
  return(list(digiKam_data = digiKam_data,
              file_formats = file_formats))
}


# if video files extracted, add DateTimeOriginal

addVideoDateTimeOriginal <- function(metadata.tmp,
                                     video){
  # if there's missing entries in DateTimeOriginal that are present in the video date time tag, copy the video tags over
  rows_of_interest1 <- which(metadata.tmp$DateTimeOriginal == "-" & 
                               metadata.tmp[,video$dateTimeTag] != "-")
  if(length(rows_of_interest1) >= 1) {
    metadata.tmp$DateTimeOriginal[rows_of_interest1] <- metadata.tmp[rows_of_interest1, video$dateTimeTag] 
  }
  metadata.tmp[, video$dateTimeTag] <- NULL
  
  return(metadata.tmp) 
}

# add HierachicalSubject for video files
addVideoHierarchicalSubject <- function(metadata.tmp,
                                        video,
                                        digiKamVideoMetadata,
                                        digiKamTablesList,
                                        videoFormat){
  
  if(nrow(digiKamVideoMetadata) == 0) return(metadata.tmp)
  # add HierarchialSubject for video files (match by filename, must be unique)

  # new version, should match filenames AND paths in digiKamVideoMetadata with metadata.tmp (can deal with duplicate file names in separate folders)
  # get (partial) album path for videos in digiKamVideoMetadata (for station / camera directory)
  # NOTE: no idea how to get the drive letter from volumeid / uuid in AlbumRoots, so it is missing here
  album_name_tmp  <- digiKamTablesList$Albums[match(digiKamVideoMetadata$album, digiKamTablesList$Albums$id), ]
  album_name_tmp2 <- merge(digiKamTablesList$AlbumRoots, album_name_tmp, by.x = "id", by.y = "albumRoot")
  album_name_tmp3 <- paste0(album_name_tmp2$specificPath, album_name_tmp2$relativePath)
  
  # combine partial path and filename of extracted videos 
  digiKamVideoMetadata$almostFullPath <-  file.path(album_name_tmp3, digiKamVideoMetadata$name)
  
  
  # subset metadata.tmp to video files only
  metadata.tmp.video <- metadata.tmp[tolower(substr(metadata.tmp$FileName, 
                                                    nchar(metadata.tmp$FileName) - 3, 
                                                    nchar(metadata.tmp$FileName))) %in% 
                                       paste(".", videoFormat, sep = ""), ]
  
  metadata.tmp.notvideo <- metadata.tmp[!tolower(substr(metadata.tmp$FileName, 
                                                    nchar(metadata.tmp$FileName) - 3, 
                                                    nchar(metadata.tmp$FileName))) %in% 
                                       paste(".", videoFormat, sep = ""), ]
  
  
  metadata.tmp.video$fullPath <- file.path(metadata.tmp.video$Directory, metadata.tmp.video$FileName)
  
  # order metadata.tmp.video.fullPath and metadata.tmp.video.fullPath
  
  metadata.tmp.video <- metadata.tmp.video[order(metadata.tmp.video$fullPath),]
  digiKamVideoMetadata <- digiKamVideoMetadata[order(digiKamVideoMetadata$almostFullPath),]
  
  
  # in metadata.tmp.video, strip drive letter (everything left of first forward slash)
  metadata.tmp.almostFullPath <- substr(metadata.tmp.video$fullPath, 
        start = (nchar(metadata.tmp.video$fullPath) - nchar(digiKamVideoMetadata$almostFullPath) + 1),
        stop = nchar(metadata.tmp.video$fullPath))

  # add metadata from digiKamVideoMetadata to metadata.tmp
  metadata.tmp.video$HierarchicalSubject_video <- digiKamVideoMetadata$HierarchicalSubject [match(metadata.tmp.almostFullPath, 
                                                                                              digiKamVideoMetadata$almostFullPath)]
  
  
  # find rows that have video metadata
  rows_of_interest2 <- which(!is.na(metadata.tmp.video$HierarchicalSubject_video) & 
                               metadata.tmp.video$HierarchicalSubject == "-")
  # write HierarchicalSubject of videos to the normal HierarchicalSubject column
  if(length(rows_of_interest2) >= 1) {
    metadata.tmp.video$HierarchicalSubject[rows_of_interest2] <- metadata.tmp.video$HierarchicalSubject_video[rows_of_interest2] 
  }
  
  
  metadata.tmp.out <- data.frame(rbindlist(list(metadata.tmp.video, metadata.tmp.notvideo), fill = TRUE))
  
  # remove column HierarchicalSubject_video
  metadata.tmp.out$HierarchicalSubject_video <- NULL
  metadata.tmp.out$fullPath <- NULL
  
  return(metadata.tmp.out)
}


# function to return fraction of day that has passed already at a given time (or its inverse, fraction of day remaining)
fractionOfDay <- function(time, type) {
  # time difference between time and midnight that day (fraction of the day that has passed already)
  delta <- as.numeric(difftime(time, as.Date(time), units = "days"))
  
  # return fraction of day that remains (for setup day)
  if(type == "after") return( 1 - delta)
  # return fraction of day that has passed (for retrieval day)
  if(type == "before")  return(delta)
}

# plot camera operation matrix (function from vignette 3)
camopPlot <- function(camOp, 
                      palette = "viridis",
                      lattice = FALSE){
  
  which.tmp <- grep(as.Date(colnames(camOp)), pattern = "01$")
  label.tmp <- format(as.Date(colnames(camOp))[which.tmp], "%Y-%m")
  at.tmp <- which.tmp / ncol(camOp)
  
  values_tmp <- sort(na.omit(unique(c(camOp))))
  
  # hcl.colors only exists in R >3.6.0, use heat.colors for earlier versions
  if(getRversion() >= "3.6.0") {
    image_colors <- grDevices::hcl.colors(n = length(values_tmp), palette = palette, rev = TRUE)
  } else {
    image_colors <- heat.colors(n = length(values_tmp), rev = TRUE)
  }

  
  # transpose and reverse x axis to make sure it plots in the same row/column order as toe original matrix
  if(nrow(camOp) > 1) {
    camop_for_plotting <- t(as.matrix(camOp)[seq(nrow(camOp) ,1),])
  } else {
    camop_for_plotting <- t(as.matrix(camOp))
  }
  
  # lattice::levelplot
  if(isTRUE(lattice)) {
    
    if (!requireNamespace("tibble", quietly = TRUE)) stop("package 'lattice' is required for levelplot (lattice = TRUE)")
    # generate color ramp 
    image_colors_lattice <- grDevices::hcl.colors(n = 100, palette = palette, rev = TRUE)
    
    
    # levelplot in lattice
    # to be improved: y axis labels (station IDs) are all drawn and can overlap. 
    # this may help: https://stat.ethz.ch/R-manual/R-devel/library/lattice/html/axis.default.html
    lattice::levelplot(camop_for_plotting,
              col.regions = image_colors_lattice,
              xlab = "", ylab ="",
              scales = list(x = list(at = which.tmp,
                                     labels = label.tmp, rot = 45)),
              aspect = "fill", 
    )

  } else {
    
    # grDevices::image()    quite basic and no legend
    image(camop_for_plotting, xaxt = "n", yaxt = "n", col = image_colors)
    axis(1, at = at.tmp, labels = label.tmp)
    axis(2, at = seq(from = 0, to = 1, length.out = nrow(camOp)), labels = rev(rownames(camOp)), las = 1)
    abline(v = at.tmp, col = rgb(0,0,0, 0.2))
    box()
  }
}

# intersect intervals, fast (adapted from lubridate)
#https://github.com/tidyverse/lubridate/blob/master/R/intervals.r
intersect.Interval.fast <- function(int1, int2, ...) {  # (x, y, ...) {

  starts <- pmax(int1@start, int2@start)
  ends <- pmin(int1@start + int1@.Data, int2@start + int2@.Data)
  spans <- as.numeric(ends) - as.numeric(starts)
  
  spans[spans < 0] <- NA
  spans
}



# surveyReport legacy version (from 2.0.3)
# intended to run instead of new surveyReport to prevent error when re-running old code

surveyReport_legacy <- function(recordTable,
                         CTtable,
                         speciesCol = "Species",
                         stationCol = "Station",
                         cameraCol,
                         setupCol,
                         retrievalCol,
                         CTDateFormat = "%Y-%m-%d",
                         CTHasProblems = FALSE,
                         recordDateTimeCol = "DateTimeOriginal",
                         recordDateTimeFormat = "%Y-%m-%d %H:%M:%S",
                         Xcol,
                         Ycol,
                         sinkpath,
                         makezip
){
  
  # check column names
  checkForSpacesInColumnNames(stationCol = stationCol, setupCol = setupCol, retrievalCol = retrievalCol,
                              recordDateTimeCol = recordDateTimeCol, speciesCol = speciesCol)
  
  CTtable     <- dataFrameTibbleCheck(df = CTtable)
  recordTable <- dataFrameTibbleCheck(df = recordTable)
  
  if(!stationCol %in% colnames(CTtable))    stop(paste('stationCol = "',   stationCol,   '" is not a column name in CTtable', sep = ''), call. = FALSE)
  if(!setupCol %in% colnames(CTtable))      stop(paste('setupCol = "',     setupCol,     '" is not a column name in CTtable', sep = ''), call. = FALSE)
  if(!retrievalCol %in% colnames(CTtable))  stop(paste('retrievalCol = "', retrievalCol, '" is not a column name in CTtable', sep = ''), call. = FALSE)
  
  if(!stationCol %in% colnames(recordTable))         stop(paste('stationCol = "',   stationCol,  '" is not a column name in recordTable', sep = ''), call. = FALSE)
  if(!recordDateTimeCol %in% colnames(recordTable))  stop(paste('recordDateTimeCol = "', recordDateTimeCol,  '" is not a column name in recordTable', sep = ''), call. = FALSE)
  if(!speciesCol %in% colnames(recordTable))         stop(paste('speciesCol = "', speciesCol,  '" is not a column name in recordTable', sep = ''), call. = FALSE)
  
  
  # make columns character
  recordTable[,speciesCol]        <- as.character(recordTable[,speciesCol])
  recordTable[,stationCol]        <- as.character(recordTable[,stationCol])
  recordTable[,recordDateTimeCol] <- as.character(recordTable[,recordDateTimeCol])
  
  CTtable[,stationCol]    <- as.character(CTtable[,stationCol])
  CTtable[,setupCol]      <- as.character(CTtable[,setupCol])
  CTtable[,retrievalCol]  <- as.character(CTtable[,retrievalCol])
  
  
  
  if(hasArg(makezip)){
    stopifnot(is.logical(makezip))
  } else {
    makezip <- FALSE
  }
  
  if(isTRUE(makezip)){
    if(hasArg(sinkpath) == FALSE) stop("if makezip is TRUE, please define sinkpath")
    if(!dir.exists(sinkpath)) stop("Could not find sinkpath:\n", sinkpath, call. = FALSE)
    
    if(Sys.getenv("R_ZIPCMD") == "" & 
       Sys.getenv("zip") == "") {
      if(isFALSE(requireNamespace("zip", quietly = TRUE))) {
        stop('cannot create zip file. Try installing the package "zip" via:  install.packages("zip")', call. = FALSE)
      } else {
        requireNamespace("zip")
        usePackageZip <- TRUE
      }
    } else {
      usePackageZip <- FALSE
    }
  }
  
  if(hasArg(cameraCol)){
    if(cameraCol %in% colnames(CTtable) == FALSE) stop(paste(cameraCol, "is not a column of CTtable"))
  } else {
    if(any(table(CTtable[,stationCol]) > 1)){
      stop("at least 1 station has more than 1 item in CTtable. Please specify 'cameraCol'")
    }
  }
  
  if(hasArg(Xcol)){
    stopifnot(hasArg(Ycol))
    stopifnot(c(Xcol, Ycol) %in% colnames(CTtable))
    CTtable[,Xcol] <- as.numeric(as.character(CTtable[,Xcol]))
    CTtable[,Ycol] <- as.numeric(as.character(CTtable[,Ycol]))
  } 
  
  
  recordTable$DateTime2 <- parseDateTimeObject(inputColumn = recordTable[,recordDateTimeCol],
                                               dateTimeFormat = recordDateTimeFormat,
                                               timeZone = "UTC")
  recordTable$Date2 <- as.Date(recordTable$DateTime2, tz = "UTC")
  
  
  # if("POSIXlt" %in% class(recordTable$DateTime2) == FALSE) stop("couldn't interpret recordDateTimeCol of recordTable using specified recordDateTimeFormat")
  # if(any(is.na(recordTable$DateTime2))) stop(paste("at least 1 entry in recordDateTimeCol of recordTable could not be interpreted using recordDateTimeFormat. row",
  #                                                  paste(which(is.na(recordTable$DateTime2)), collapse = ", ")))
  
  if(all(as.character(unique(recordTable[,stationCol])) %in% CTtable[,stationCol]) == FALSE){
    (stop("Not all values of stationCol in recordTable are matched by values of stationCol in CTtable"))
  }
  
  # check date columns and format
  
  CTtable[,setupCol]     <- parseDateObject(inputColumn = CTtable[,setupCol],     CTDateFormat, checkNA = TRUE, checkEmpty = TRUE)
  CTtable[,retrievalCol] <- parseDateObject(inputColumn = CTtable[,retrievalCol], CTDateFormat, checkNA = TRUE, checkEmpty = TRUE)
  
  
  if(isTRUE(CTHasProblems)){    # camera problem columns
    
    # check that problems are arranged in order 1,2,3,...
    cols.prob.from <- grep(colnames(CTtable), pattern = "Problem\\d\\Sfrom")
    cols.prob.to   <- grep(colnames(CTtable), pattern = "Problem\\d\\Sto")
    
    if(length(cols.prob.from) == 0) stop("could not find column ProblemX_from")
    if(length(cols.prob.to) == 0)   stop("could not find column ProblemX_to")
    
    if(all(order(colnames(CTtable)[cols.prob.from]) == seq(1:length(cols.prob.from))) == FALSE){"problem columns are not arranged correctly"}
    if(all(order(colnames(CTtable)[cols.prob.to])   == seq(1:length(cols.prob.to)))   == FALSE){"problem columns are not arranged correctly"}
    
    if(length(cols.prob.from) != length(cols.prob.to)){
      stop("number of 'Problem..._from' and 'Problem..._to' columns differs. Check column names Sample: 'Problem1_from', 'Problem1_to'")
    }
    
    n_days_inactive <- data.frame(matrix(NA,
                                         ncol = length(cols.prob.from),
                                         nrow = nrow(CTtable)))
    
    for(xy in 1:length(cols.prob.from)){
      
      if(isTRUE(unlist(strsplit(colnames(CTtable)[cols.prob.from[xy]], split = "_"))[1] !=
                unlist(strsplit(colnames(CTtable)[cols.prob.to[xy]], split = "_"))[1])) stop (
                  paste("problem columns are arranged incorrectly (",
                        colnames(CTtable)[cols.prob.from[xy]], ", ",
                        colnames(CTtable)[cols.prob.to  [xy]], ")",
                        sep = "")
                )
      
      CTtable[, cols.prob.from[xy]] <- parseDateObject(inputColumn = CTtable[, cols.prob.from[xy]], CTDateFormat, checkNA = FALSE, checkEmpty = FALSE)
      CTtable[, cols.prob.to[xy]]   <- parseDateObject(inputColumn = CTtable[, cols.prob.to[xy]],   CTDateFormat, checkNA = FALSE, checkEmpty = FALSE)
      
      n_days_inactive[,xy] <- CTtable[cols.prob.to[xy]] - CTtable[cols.prob.from[xy]]       # compute number of inactive trap nights
      n_days_inactive[,xy] <- as.integer(n_days_inactive[,xy])
    }
    for(xyz in cols.prob.from){
      if(any(CTtable[,setupCol] > CTtable[,xyz], na.rm = TRUE)){
        stop(paste(paste(CTtable[which(CTtable[,setupCol] > CTtable[,xyz]), stationCol], collapse = ", "), ": Problem begins before Setup"))
      }
    }
    for(xyz2 in cols.prob.to){
      if(any(CTtable[,retrievalCol] < CTtable[,xyz2], na.rm = TRUE)){
        stop(paste(paste(CTtable[which(CTtable[,retrievalCol] < CTtable[,xyz2]), stationCol], collapse = ", "), ": Problem ends after retrieval"))
      }
    }
    rm(xy, xyz, xyz2)
    
    n_days_inactive_rowsum <- rowSums(n_days_inactive, na.rm = TRUE)
  } else {
    n_days_inactive_rowsum <- rep(0, times = nrow(CTtable))
  }
  stopifnot(nrow(n_days_inactive_rowsum) == nrow(CTtable))
  
  n_days_inactive_rowsum <- aggregate(n_days_inactive_rowsum,
                                      by    = list(CTtable[,stationCol]),
                                      FUN   = sum,
                                      na.rm = TRUE)
  
  
  # adjust options for printing results
  options.tmp <- options()
  on.exit(options(options.tmp))
  options(max.print=1e6)
  options(width = 1000)
  
  # station and image date ranges
  station.tmp1 <- aggregate(CTtable[,setupCol],
                            list(CTtable[,stationCol]),
                            FUN = min)
  station.tmp2 <- aggregate(CTtable[,retrievalCol],
                            list(CTtable[,stationCol]),
                            FUN = max)
  image.tmp1   <- aggregate(recordTable$Date2,
                            list(recordTable[,stationCol]),
                            FUN = min)
  image.tmp2   <- aggregate(recordTable$Date2,
                            list(recordTable[,stationCol]),
                            FUN = max)
  
  
  n_nights_total      <- as.integer(CTtable[,retrievalCol] - CTtable[,setupCol])
  n_nights_total_agg  <- aggregate(n_nights_total,
                                   by  = list(CTtable[,stationCol]),
                                   FUN = sum)
  n_cameras_total_agg <- aggregate(CTtable[,stationCol],
                                   by  = list(CTtable[,stationCol]),
                                   FUN = length)
  n_nights_active     <- n_nights_total_agg[,2] - n_days_inactive_rowsum[,2]
  
  date_range_combined <- data.frame(station.tmp1[,1], station.tmp1[,2],
                                    image.tmp1[match(station.tmp1[,1], image.tmp1[,1]),2],
                                    image.tmp2[match(station.tmp1[,1], image.tmp2[,1]),2],
                                    station.tmp2[,2],
                                    n_nights_total_agg[,2],
                                    n_nights_active,
                                    n_cameras_total_agg[,2])
  colnames(date_range_combined) <- c(stationCol, "setup_date",  "first_image_date", "last_image_date", "retrieval_date",
                                     "n_nights_total", "n_nights_active", "n_cameras")
  rownames(date_range_combined) <- NULL
  
  
  # sink/print output
  
  
  if(hasArg(sinkpath)){
    sinkfile <- file.path(sinkpath, paste("survey_report_", Sys.Date(), ".txt", sep = ""))
    sink(file = sinkfile)
    print(paste("Survey Report generated", Sys.Date() ))
  }
  cat("\n-------------------------------------------------------\n")
  print(paste("Total number of stations: ", length(unique(CTtable[,stationCol]))))
  cat("\n-------------------------------------------------------\n")
  print(paste("Number of operational stations: ", length(which(n_nights_active >= 1))))
  cat("\n-------------------------------------------------------\n")
  
  if(hasArg(cameraCol)){
    print(paste("Total number of cameras: ", length(unique(paste(CTtable[,stationCol], CTtable[,cameraCol], sep = "_")))))
    cat("\n-------------------------------------------------------\n")
    
    print(paste("n nights with cameras set up (operational or not): ",
                sum(n_nights_total, na.rm = TRUE)))
    cat("\n-------------------------------------------------------\n")
    print(paste("n nights with cameras set up and active (trap nights): ",
                sum(n_nights_active, na.rm = TRUE)))
  } else {
    print(paste("n nights with cameras set up (operational or not. NOTE: only correct if 1 camera per station):",
                sum(n_nights_total, na.rm = TRUE)))
    cat("\n-------------------------------------------------------\n")
    print(paste("n nights with cameras set up and active (trap nights. NOTE: only correct if 1 camera per station):",
                sum(n_nights_active, na.rm = TRUE)))
  }
  cat("\n-------------------------------------------------------\n")
  print(paste("total trapping period: ", paste(min(station.tmp1[,2]), max(station.tmp2[,2]), sep = " - ")))
  
  
  
  # total number of independent records by species
  species_record_table <- data.frame(species    = rep(NA, times = length(unique(recordTable[, speciesCol]))),
                                     n_events   = rep(NA, times = length(unique(recordTable[, speciesCol]))), 
                                     n_stations = rep(NA, times = length(unique(recordTable[, speciesCol]))))
  
  for(i in 1:length(unique(recordTable[, speciesCol]))){
    
    tmp                           <- unique(recordTable[, speciesCol])[i]
    subset.tmp                    <- subset(recordTable, recordTable[, speciesCol] == tmp)
    species_record_table[i, ]     <- c(tmp, nrow(subset.tmp), length(unique(subset.tmp[,stationCol])))
    rm(subset.tmp, tmp)
  }
  species_record_table2           <- species_record_table[order(species_record_table$species),]
  rownames(species_record_table2) <- NULL
  
  # total number of independent records by station
  
  # only species that were recorded
  station_record_table1           <- aggregate(recordTable[,1], by = list(recordTable[,stationCol],recordTable[,speciesCol]), FUN = length)
  colnames(station_record_table1) <- c(stationCol, speciesCol, "n_events")
  station_record_table1           <- station_record_table1[order(station_record_table1[,stationCol], station_record_table1[,speciesCol]),]
  rownames(station_record_table1) <- NULL
  
  #including all species and 0s
  station_record_table           <- expand.grid(sort(unique(recordTable[,stationCol])), sort(unique(recordTable[,speciesCol])))
  station_record_table           <- data.frame(station_record_table, n_events = 0)
  colnames(station_record_table) <- c(stationCol, speciesCol, "n_events")
  rownames(station_record_table) <- NULL
  # species lists by station
  
  n_spec_by_station           <- aggregate(station_record_table1[,speciesCol], by = list(station_record_table1[,stationCol]), FUN = length)
  colnames(n_spec_by_station) <- c(stationCol, "n_species")
  rownames(n_spec_by_station) <- NULL
  
  for(j in 1:length(unique(recordTable[, stationCol]))){
    
    tmp                      <- unique(recordTable[, stationCol])[j]
    subset.tmp               <- table(subset(recordTable, recordTable[, stationCol] == tmp)[,speciesCol] )
    
    station_record_table.tmp <- station_record_table[station_record_table[, stationCol] == tmp,]
    station_record_table.tmp$n_events[match(names(subset.tmp), station_record_table.tmp[,speciesCol])] <- subset.tmp
    
    station_record_table[station_record_table[, stationCol] == tmp,] <- station_record_table.tmp
    rm(station_record_table.tmp)
  }
  station_record_table2 <-  station_record_table[order(station_record_table[,stationCol], station_record_table[,speciesCol]),]
  rownames(station_record_table2) <- NULL
  
  if(hasArg(sinkpath)){
    cat("\n\n-------------------------------------------------------\n\n")
    print(" survey station and image date ranges")
    print(date_range_combined)
    cat("\n\n-------------------------------------------------------\n\n")
    print(" number of species by station")
    print(n_spec_by_station)
    cat("\n\n-------------------------------------------------------\n\n")
    print(" number of events and station by species")
    print(species_record_table2)
    cat("\n\n-------------------------------------------------------\n\n")
    print(" number of events and species by station (only species that were recorded at stations)")
    print(station_record_table1)
    cat("\n\n-------------------------------------------------------\n\n")
    print(" number of events and species by station (all species, all stations, including species that were not recorded)")
    print(station_record_table2)
    sink()
    message("saved output to file \n",
            paste(sinkfile, "\n\n"))
  }
  
  output <- list(date_range_combined, n_spec_by_station, species_record_table2, station_record_table1, station_record_table2)
  names(output) <- c("survey_dates", "species_by_station", "events_by_species",
                     "events_by_station", "events_by_station2")
  
  # make zip file
  if(isTRUE(makezip)){
    
    arglist_zip <-  list(output               = output,
                         recordTable          = recordTable,
                         CTtable              = CTtable,
                         speciesCol           = speciesCol,
                         stationCol           = stationCol,
                         setupCol             = setupCol,
                         retrievalCol         = retrievalCol,
                         CTDateFormat         = CTDateFormat,
                         CTHasProblems        = CTHasProblems,
                         recordDateTimeCol    = recordDateTimeCol,
                         recordDateTimeFormat = recordDateTimeFormat,
                         sinkpath             = sinkpath,
                         usePackageZip        = usePackageZip)
    
    if(hasArg(Xcol) & hasArg(Ycol)) arglist_zip <- c(arglist_zip,  Xcol = Xcol, Ycol = Ycol)
    
    do.call(makeSurveyZip, arglist_zip)
    
  }
  
  return(invisible(output))
}

Try the camtrapR package in your browser

Any scripts or data that you put into this service are public.

camtrapR documentation built on May 11, 2022, 5:18 p.m.