R/find_thresholds.R

Defines functions find_thresholds

Documented in find_thresholds

#'Find thresholds of diffusive spread 
#'
#'Find thresholds of diffusive spread and get a list of potential jumps. 
#'This function identifies spatial discontinuities in the distribution of 
#'species’ occurrences between the putative limit of the continuous, diffusive 
#'spread (i.e., the invasion front) and outlying occurrences.
#'This function must be applied after attribute_sectors() and before find_jumps().
#'
#'@export
#'
#'@param dataset A dataset to be processed, output of attribute_sectors()
#'@param gap_size Distance between the invasion front and a positive point
#'necessary for it to be considered a jump, in kilometers (default: 15)
#'@param negatives Should the function verify that negative surveys were found 
#'in the spatial discontinuities?
#'
#'@return Two tables: one table \code{Thresholds} containing the threshold per year and sector,
#'one table \code{potJumps} containing the list of potential jumps for find_jumps()
#' 
#'@examples
#' thresholds <- find_thresholds(dataset)


find_thresholds <- function(dataset, 
                       gap_size = 15,
                       negatives = TRUE) {
  
  cat(paste(Sys.time(), "Start finding thresholds... ")) 
            
  # Initialize variables to store the results
  rotation = unique(sort(dataset$rotation_nb)) # Number of rotations used in the dataset
  sector = unique(sort(dataset$sectors_nb)) # Number of sectors used in the dataset
  year = unique(sort(dataset$year)) # Years considered
  Dist = NULL # Will store the thresholds of the diffusive spread
  Jumps_alls = NULL # Will store potential jumps for all sectors in a rotation
  Jumps_allr = NULL # Will store potential jumps for all rotations
  
  # calculate the distance at which points that have the same 'DistToIntro' may be distant of more than 'gap size'
  # Calculate the central angle in radians
  # theta <- 2 * pi / max(sector)
  # Calculate the radius using the formula
  # limitRadius <- gap_size / (2 * sin(theta / 2)) # circle radius (in km) at which 2 points start being distant of more than 'gap size' 


  
  for (rot in rotation){ # for each rotation in the dataset
    cat(paste0("\n Rotation ", rot, "/", max(rotation), ": "))
    dataset_rot <- dataset %>% dplyr::filter(rotation_nb == rot) # Create a dataset for this rotation only
    
    for (s in sector){ # for each sector in this rotation
      dataset_n = NULL # initialize an empty object to cumulate datasets year after year
      jumps_sector = data.frame(DistToIntro = 0) # initialize an empty object to store jumps in each sector
      
      if (s == 1 & s == max(sector)) {
        cat(paste0("Sector ", as.numeric(s), "/", max(sector), "\n"))
      } else if(s == 1){
        cat(paste0("Sector ", as.numeric(s), "/", max(sector), "... "))
      } else if (s == max(sector)) {
        cat(paste0(" ", as.numeric(s), "/", max(sector), "\n"))
      } else {
        cat(paste0(" ", as.numeric(s), "/", max(sector), "... "))
      } 
      
      for (y in year){ # for each year in the dataset
        
        # Select the dataset to be examined, with the correct rotation, sector, and year
        # Additionally, we assume that no population is going extinct over the years 
        # and keep datasets from previous years. 
        dataset_n <- dataset_rot %>% dplyr::filter(sectors_nb == s & 
                                              year %in% c(year[1]:y) & 
                                              established == T) %>%                    
          arrange(year) # Order rows by year to facilitate the next operations.
        
        
        if (dim(dataset_n)[1] == 0){ # If there are less than two points in the sector up to that year, 
          next # go to the next sector
        } else if (dim(dataset_n)[1] == 1) { threshold = dataset_n$DistToIntro[1] }
        
        # Initialize iterations to identify spatial discontinuities in species presence
        i = 1 # initialize point 1
        # distancei = 1 # initialize distance for point 1
        j = 2 # initialize point 2
        # distancej = 2 # initialize distance for point 2
        
        # Order the distance variable by increasing order
        distance_sorted <- sort(dataset_n$DistToIntro)
        continue = T
        
        while (j <= length(distance_sorted) & continue) { # until the variable is finished or a jump is found
            distancei = distance_sorted[i]  
            distancej = distance_sorted[j] 

          if (distancei + gap_size < distancej ) { # if j is a jump according to DistToIntro, 
            threshold = distance_sorted[i] #the threshold of the diffusive spread is i
            continue = F
            
            if(negatives){ # if user indicated to look for negative surveys in the gap, 
              # make sure that the gap is not due to an absence of surveys 
              # select the dataset of negative surveys corresponding to the discontinuity
              dataset_total <- dataset_rot %>% dplyr::filter(established == FALSE &
                                                             sectors_nb == s & 
                                                             year %in% c(year[1]:y) & 
                                                             between(DistToIntro, threshold, threshold+gap_size))
              if (dim(dataset_total)[1] == 0) { # if this dataset is empty, send a warning
                cat(paste0("Warning: no negative survey in the gap identified in rotation ", 
                             rot, ", sector ", s, " and year ", y, " after distance ", round(threshold,0), 
                             ". The spatial discontinuity that is identified may be due to few surveys done in this area. Consider decreasing the number of sectors \n")) 
              }
            }
            
          } else { # if there is no jump according to DistToIntro, let's check that j is not far from any point (i isn't)
            # if (distancej > limitRadius){ # and pairwise point distances may be larger than the difference in DistToIntro
              rowNumber = which(grepl(distancej, dataset_n$DistToIntro)) # find the corresponding point in the dataset 
              if (length(rowNumber) > 1) { rowNumber = tail(rowNumber, n = 1)} # If the threshold has been the same for several years, select the survey from the last year
              pointj = dataset_n[rowNumber,] # find the corresponding line
              dataset_test = dataset_n[-rowNumber,] # take the dataset without this point
              dataset_test %<>% dplyr::filter(dplyr::between(DistToIntro, pointj$DistToIntro - gap_size, pointj$DistToIntro)) # check if there are points close to this point 
              pairwise_dist <- geosphere::distGeo(dataset_test[c(3,2)], pointj[c(3,2)])/1000 
              if (min(pairwise_dist) > gap_size){
                threshold = distance_sorted[i]
                continue = F
                
                if(negatives){ # if user indicated to look for negative surveys in the gap, 
                  # make sure that the gap is not due to an absence of surveys 
                  # select the dataset of negative surveys corresponding to the discontinuity
                  dataset_total <- dataset_rot %>% dplyr::filter(established == FALSE &
                                                                   sectors_nb == s & 
                                                                   year %in% c(year[1]:y) & 
                                                                   between(DistToIntro, pointj$DistToIntro - gap_size, pointj$DistToIntro))
                  pairwise_dist <- geosphere::distGeo(dataset_total[c(3,2)], pointj[c(3,2)])/1000 
                  if (dim(dataset_total)[1] == 0) { # if this dataset is empty, send a warning
                    cat(paste0("Warning: no negative survey in the gap identified in sector ", s, " and year ", y, " around point (", 
                               round(longitude), ",", round(latitude), 
                               "). The spatial discontinuity that is identified may be due to few surveys in this area. Consider decreasing the number of sectors \n")) 
                  }
                }
                }
          # }
          }
            i = i+1 # go to the next pair of points
            j = j+1 # go to the next pair of points
        }
        if(continue){threshold = distance_sorted[i]}
  
        
        # if (distancei + gap_size > distancej) { # if there is no potential jump identified (all points are in a window of 15 km to the introduction point)  
        #     threshold = distance_sorted[i] # the threshold of the diffusive spread is the distance of the last point i
        # } else { # if a jump was found, the threshold of the diffusive spread is the value of the previous iteration
        #   threshold = distance_sorted[i-1]
          
        
        
        # Now that we know the threshold distance, find the corresponding survey that is in the initial table.
        rowNumber = which(grepl(threshold, dataset_n$DistToIntro)) # extract the row number of this survey
        if (length(rowNumber) > 1) { rowNumber = tail(rowNumber, n = 1)} # If the threshold has been the same for several years, select the survey from the last year
        
        # Store this threshold in an object
        threshold_survey = dataset_n[rowNumber,]
        
        # Make sure the threshold is associated to the year we are looking at, even if the threshold is the same as the year before
        threshold_survey$year <- y
        jump_survey = dataset_n %>% dplyr::filter(DistToIntro >= threshold & year == y) # all points after this threshold are potential jumps and stored
        
        # Add results at each iteration (sector, rotation, year)
        Dist = rbind(Dist, threshold_survey) # add this threshold to the list of thresholds
        jumps_sector = dplyr::bind_rows(jumps_sector, jump_survey) # add the potential jumps to the list of potential jumps
      }
      
      Jumps_alls = rbind(Jumps_alls, jumps_sector[-1,]) # add potential jumps from this sector
    }
    
    Jumps_allr = rbind(Jumps_allr, Jumps_alls) # add potential jumps from this rotation
  } 

  # Reduce the lists to unique points (without repetitions due to rotations)
  Jumps_all = Jumps_allr %>% dplyr::select(-c(sectors_nb, rotation_nb, sectors)) %>% 
    unique()
  preDist = Dist %>% dplyr::select(-c(sectors_nb, rotation_nb, sectors)) %>% 
    unique()
  
  Jumps <- dplyr::setdiff(Jumps_all, preDist) # Remove jumps that are identified as preDist (non-jumps) in some rotations

  # This is a list of points that are away from the core invasion, i.e. potential jumps
  # It may still include points from the core invasion due to the separation in sectors.
  
  # Send an info message
  cat(paste("Threshold analysis done.", dim(Jumps)[1], "potential jumps were found. \n"))
  # select the results to return
  results <- list("preDist" = Dist, "potJumps" = Jumps)
  
  return(results)
} 
nbelouard/slfjumps documentation built on July 27, 2024, 8:28 a.m.