#' Calculate dataset metrics
#'
#' Takes a formattedOccData object and the name of a species. Runs a set of calculations on the dataset for that species
#' Based on rules of thumb. Original version by Mark Logie; integrated into sparta by NJBI
#'
#' @param sp character string identifying the species name
#' @param formattedData output from formattedOccData
#' @param verbose Logical. Default (`FALSE`) returns seven metrics.
#'
#' @references Pocock, Logie, Isaac, Outhwaite & August (2019).
#' Rapid assessment of the suitability of multi-species citizen
#' science datasets for occupancy trend analysis. \emph{bioRxiv} 813626 doi:10.1101/813626.
#' @export
dataMetrics <- function(sp, formattedData, verbose = FALSE){
tFDall <- merge(formattedData$occDetdata,
formattedData$spp_vis)
tFD <- tFDall[tFDall[,names(tFDall) == sp],]
if(nrow(tFD)==0){
# No data, so set a few variables to 0. The reason for using 0's as defaults is if
# there is no data, these parameters should be 0 e.g. median number of visits = 0.
# Without this default, many parameters return NA, which is less accurate.
nyears <- Spnsite <- Spnvisits <- SpvisitPerSite <- SpvisitPerYear <-
avVisitPerYear <- median <- P70 <- P80 <- P90 <- sdVisitsPerYear <-
coeffVar <- zmedian <- zP70 <- zP80 <- zP90 <- zsdVisitsPerYear <-
zcoeffVar <- visits_median <- visits_P70 <- visits_P80 <- visits_P90 <-
prop_repeats_spc <- prop_repeats_grp <- prop_of_years <- prop_list_one <-
prop_abs_list <- Totalnvisits <- Totalnsites <- TotalvisitPerYear <- 0
prop_abs <- 1
} else {
# range of years with data
nyears <- (1+max(tFD$TP)-min(tFD$TP))
# Number of sites species is observed at
Spnsite <- length(unique(tFD$site))
# Number of visits on which the species is observed
Spnvisits <- length(unique(tFD$visit))
# Number of visits per site
SpvisitPerSite <- Spnvisits/Spnsite
# Number of visits per year
SpvisitPerYear <- Spnvisits/nyears
# average visits per year
visits_year <- tapply(tFD$visit, tFD$TP,
FUN = function(x) length(unique(x)))
avVisitPerYear = mean(visits_year)
# find 70, 80 and 90th percentiles
percentiles <- quantile(visits_year,c(.5,.7,.8,.9))
median <- as.numeric(percentiles[1])
P70 <- as.numeric(percentiles[2])
P80 <- as.numeric(percentiles[3])
P90 <- as.numeric(percentiles[4])
if(nrow(tFD)==1){
sdVisitsPerYear <- coeffVar <- 1
} else {
# sd visits per year
sdVisitsPerYear = sd(visits_year)
# coefficient of variation
coeffVar <- sd(visits_year)/mean(visits_year)
}
unique_years <- unique(tFD$TP)
all_years <- min(tFDall$TP):max(tFDall$TP)
missing_years <- all_years[!(all_years %in% unique_years)]
visits_year <- c(visits_year,rep(0,length(missing_years)))
# find 70, 80 and 90th percentiles
percentiles <- quantile(visits_year,c(.5,.7,.8,.9))
zmedian <- as.numeric(percentiles[1])
zP70 <- as.numeric(percentiles[2])
zP80 <- as.numeric(percentiles[3])
zP90 <- as.numeric(percentiles[4])
# sd visits per year
zsdVisitsPerYear = sd(visits_year)
# coefficient of variation
zcoeffVar <- sd(visits_year)/mean(visits_year)
# repeat visits
# Within each year get the counts of visits to each location
#repeats <- count(tFD, site, TP) # throws an error
repeats <- subset(as.data.frame(with(tFD, table(site, TP))), Freq > 0)
repeats$concat <- paste0(repeats$site,'-',repeats$TP)
# What proportion of these are > 1
# this metric is restricted to just the dataset with records for this species
prop_repeats_spc <- sum(repeats$Freq > 1) / nrow(repeats)
# visits within the group for visits to each location within a year
group_repeats <- subset(as.data.frame(with(tFDall, table(site, TP))), Freq > 0)
group_repeats$concat <-
paste0(group_repeats$site,'-',group_repeats$TP)
# Find which of these group visits are to sites in years where the
# species of interest was observed
site_year <- group_repeats$concat %in% repeats$concat
temp <- group_repeats$Freq[site_year]
prop_repeats_grp <- sum(temp > 1) / length(temp) # NJBI THIS LOOKS WRONG
# NB this metric is restricted to just the site:year combos with records for this species
# all visits to a site in a year where there was at least one observance
# of species of interest
percentiles <- quantile(temp,c(.5,.7,.8,.9))
visits_median <- as.numeric(percentiles[1])
visits_P70 <- as.numeric(percentiles[2])
visits_P80 <- as.numeric(percentiles[3])
visits_P90 <- as.numeric(percentiles[4])
# proportion of years with data
prop_of_years <-
length(unique(tFD$TP))/(1+max(tFDall$TP)-min(tFDall$TP))
# lists of length 1
prop_list_one <- sum(tFD$L == 1) / nrow(tFD)
}
# proportion of records for the group which did not observe this species
prop_abs <- (nrow(tFDall)-nrow(tFD))/(nrow(tFDall))
# proportion of non-observances with list length > 1
tFDabs <- tFDall[!tFDall[,colnames(tFDall) %in% sp],]
prop_abs_list <- sum(tFDabs$L > 1) / nrow(tFDabs)
# Number of sites for taxonomic group
Totalnsites <- length(unique(tFDall$site))
# Number of visits for taxonomic group
Totalnvisits <- nrow(tFDall)
if(nrow(tFDall)==0){
total_years <- 0
TotalvisitPerYear <- 0
} else {
total_years <- length(min(tFDall$TP):max(tFDall$TP))
TotalvisitPerYear <- Totalnvisits/total_years
}
if(!verbose)
df <- data.frame(
median = median,
P90 = P90,
visits_median = visits_median,
visits_P90 = visits_P90,
prop_list_one = prop_list_one,
prop_repeats_grp = prop_repeats_grp,
prop_abs = prop_abs,
stringsAsFactors = FALSE
)
else
df <- data.frame(#species = sp,
avVisitPerYear = avVisitPerYear,
median = median,
P70 = P70,
P80 = P80,
P90 = P90,
sdVisitsPerYear = sdVisitsPerYear,
coeffVar = coeffVar,
zmedian = zmedian,
zP70 = zP70,
zP80 = zP80,
zP90 = zP90,
zsdVisitsPerYear = zsdVisitsPerYear,
zcoeffVar = zcoeffVar,
prop_repeats_spc = prop_repeats_spc,
prop_repeats_grp = prop_repeats_grp,
visits_P70 = visits_P70,
visits_P80 = visits_P80,
visits_P90 = visits_P90,
prop_of_years = prop_of_years,
prop_list_one = prop_list_one,
prop_abs = prop_abs,
prop_abs_list = prop_abs_list,
nyears = nyears,
Spnsite = Spnsite,
Spnvisits = Spnvisits,
SpvisitPerSite = SpvisitPerSite,
SpvisitPerYear = SpvisitPerYear,
Totalnsites = Totalnsites,
Totalnvisits = Totalnvisits,
TotalvisitPerYear = TotalvisitPerYear,
stringsAsFactors = FALSE)
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.