R/sampling_effort.R

Defines functions sampling_effort

Documented in sampling_effort

#' @title iNaturalist Sampling Effort Estimation
#'
#' @description Function for generating sampling effort in time and distance from an iNaturalist API pull divided into major taxonomic groups
#'
#' @param iNatdf Raw observation data frame generated by API pull using the iNat() function
#'
#' @param effort_time_lim Maximum time (in minutes) between observations that count as single, continuous sampling effort
#'
#' @param max_geo_jump Max allowable instantaneous jump (in meters) between consecutive observations to be included in sampling distance effort calculation
#'
#' @param max_m_per_min Max allowable travel distance per minute to be included in sampling distance effort calculation
#'
#' @return NULL
#'
#' @examples
#'
#' @export sampling_effort

sampling_effort <- function(iNatdf = NULL, effort_time_lim = 30, max_geo_jump = 100, max_m_per_min = 900) {
  df2 <- cbind(iNatdf$user.id,iNatdf$time_observed_at,iNatdf$taxon.iconic_taxon_name,iNatdf$uuid)
  df2 <- na.omit(df2)

  df2.ord <- df2[order(df2[,2]),]
  df2.ord <- gsub("T", " ", df2.ord)
  df2.ord[,2] <- substr(df2.ord[,2], 1, nchar(df2[,2])-6)

  samp.effort <- matrix(0, nrow = length(unique(df2.ord[,1])), ncol = 18)
  colnames(samp.effort) <- c("tot_obs", "effort_based_obs", "sampling_effort_mins", "sampling_effort_m_dist",
                             "Actinopterygii", "Animalia", "Amphibia", "Arachnida", "Aves", "Chromista", "Fungi",
                             "Insecta", "Mammalia", "Mollusca", "Reptilia", "Plantae", "Protozoa", "unknown")
  rownames(samp.effort) <- unique(df2.ord[,1])

  for (j in 1:length(unique(df2.ord[,1]))){
    df2.temp <- df2.ord[df2.ord[,1]==unique(df2.ord[,1])[j],]
    samp.effort[j,1] <- sum(df2.ord[,1]==unique(df2.ord[,1])[j])
    samp.effort[j,5] <- sum(df2.temp=="Actinopterygii")
    samp.effort[j,6] <- sum(df2.temp=="Animalia")
    samp.effort[j,7] <- sum(df2.temp=="Amphibia")
    samp.effort[j,8] <- sum(df2.temp=="Arachnida")
    samp.effort[j,9] <- sum(df2.temp=="Aves")
    samp.effort[j,10] <- sum(df2.temp=="Chromista")
    samp.effort[j,11] <- sum(df2.temp=="Fungi")
    samp.effort[j,12] <- sum(df2.temp=="Insecta")
    samp.effort[j,13] <- sum(df2.temp=="Mammalia")
    samp.effort[j,14] <- sum(df2.temp=="Mollusca")
    samp.effort[j,15] <- sum(df2.temp=="Reptilia")
    samp.effort[j,16] <- sum(df2.temp=="Plantae")
    samp.effort[j,17] <- sum(df2.temp=="Protozoa")
    samp.effort[j,18] <- sum(df2.temp=="unknown")
    if (is.null(dim(df2.temp))) {
      samp.effort[j,3] <- 0} else {
        samp.diffs <- rep(0,(length(df2.temp[,2])-1))
        samp.dists <- rep(0,(length(df2.temp[,2])-1))
        for (i in 1:(length(df2.temp[,2])-1)){
          samp.diffs[i] <- -difftime(df2.temp[i,2], df2.temp[i+1,2], units = "mins")
          samp.dists[i] <- distGeo(iNatdf[iNatdf$uuid==df2.temp[i,4],]$geojson.coordinates[[1]],iNatdf[iNatdf$uuid==df2.temp[i+1,4],]$geojson.coordinates[[1]])
          if (sum(samp.diffs<=30) > 0) {
            samp.effort[j,2] <- sum(samp.diffs<=30) + 1
          }
          samp.effort[j,3] <- sum(samp.diffs[samp.diffs<=30])
          samp.effort[j,4] <- sum(samp.dists[samp.diffs<=30 & (samp.dists<100 | ((samp.dists/samp.diffs) < 900))])

        }
      }
  }
  return(samp.effort)
}
pjhanly/iNatTools documentation built on Oct. 30, 2022, 1:36 a.m.