#' Frescalo trend analysis
#'
#' A function for using Frescalo (Hill, 2011), a tool for analysing occurrence data when
#' recording effort is not known. This function returns the output from Frescalo to the
#' R session and saves it to the path specified by \code{sinkdir}. By setting
#' \code{plot_fres} to \code{TRUE} maps of the results will also be saved. Plotting the
#' returned object gives a useful summary.
#'
#' @param Data A dataframe object. This should consist of rows of
#' observations and columns indicating the species and location as well as
#' either the year of the observation or columns specifying the start and end
#' dates of the observation. It is important that date columns are in
#' a date format.
#' @param frespath the path to the frescalo .exe file. This can be downloaded from
#' http://www.brc.ac.uk/biblio/frescalo-computer-program-analyse-your-biological-records.
#' It is currently only available for Windows. The directory where the .exe
#' is saved should be writeable.
#' @param time_periods A dataframe object with two columns. The first column contains the
#' start year of each time period and the second column contains the end year of
#' each time period. Time periods should not overlap.
#' @param site_col The name of the site column in \code{Data}
#' @param sp_col The name of the species column in \code{Data}
#' @param year_col The name of the year column in \code{Data}
#' @param start_col The name of the start date column in \code{Data}
#' @param end_col The name of the end date column in \code{Data}
#' @param species_to_include Optionally a character vector listing the names of species to be used.
#' Species not in your list are ignored. This is useful if you are only interested in a
#' subset of species.
#' @param sinkdir String giving the output directory for results
#' @param plot_fres Logical, if \code{TRUE} maps are produced by Frescalo. Default is
#' \code{FALSE}. CURRENTLY ONLY WORKS FOR UK GRID-REFERENCE DATA
#' @param Fres_weights 'LC*' specifies a weights files based on landcover data.
#' The suffix specifies the extend ('LCUK', 'LCNI' or 'LCGB'). 'VP' uses a weights
#' file based on vascular plant data for the UK
#' , both are included in the package. Alternativly a custom weights file can be
#' given as a data.frame. This must have three columns: target cell, neighbour cell,
#' weight. Default is 'LCGB'
#' @param non_benchmark_sp a character vector, giving the names of species not to be
#' used as benchmarks in Frescalo. Default is \code{NULL} and all species are used.
#' See Hill, 2011 for reasons why some species may not be suitable benchmarks.
#' @param fres_site_filter Optionally a character vector giving
#' the names of sites to be used in the trend analysis. Sites not include in this
#' list are not used for estimating TFactors. Default is \code{NULL} and all sites are
#' used.
#' @param phi Target frequency of frequency-weighted mean frequency. Default is 0.74 as in
#' Hill (2011). If set to \code{NULL}, phi will start at 0.74 and will be increased if
#' the value is smaller than the 98.5 percentile of input phi, limited to
#' to a maximum of 0.95.
#' @param alpha the proportion of the expected number of species in a cell to be treated as
#' benchmarks. Default is 0.27 as in Hill (2011). This is limited to 0.08 to 0.50.
#' @param trend_option Set the method by which you wish to calculate percentage change. This can currently
#' be set to either \code{'arithmetic'} (default) or \code{'geometric'}. Arimthmetic calculates
#' percentage change in a linear fashion such that a decline of 50\% over 50 years is
#' equal to 10\% in 10 years. Using the same example a Geometric trend would be 8.44\%
#' every 10 years as this work on a compound rate.
#' @param NYears The number of years over which you want the percentage change to be calculated (i.e.
#' 10 gives a decadal change). Default = 10
#' @param ignore.ireland Logical, if \code{TRUE} Irish hectads are removed. Default
#' is \code{FALSE}
#' @param ignore.channelislands Logical, if \code{TRUE} channel island hectads are
#' removed. Default is \code{FALSE}
#' @return Results are saved to file and most are returned in a list to R.
#'
#' The list object returned is comprised of the following:
#'
#' \item{\bold{$paths}}{This list of file paths provides the locations of the raw data files
#' for $log, $stat, $freq and $trend, in that order}
#'
#' \item{\bold{$trend}}{This dataframe provides the list of time factors for each species}
#'
#' \tabular{rll}{
#' - \tab \code{Species} \tab Name of species\cr
#' - \tab \code{Time} \tab Time period, specified as a class (e.g. 1970); times need not be numeric and are indexed as character strings\cr
#' - \tab \code{TFactor} \tab Time factor, the estimated relative frequency of species at the time\cr
#' - \tab \code{St_Dev} \tab Standard deviation of the time factor, given that spt (defined below) is a weighted sum of binomial variates\cr
#' - \tab \code{X} \tab Number of occurrences of species at the time period\cr
#' - \tab \code{Xspt} \tab Number of occurrences, given reduced weight of locations having very low sampling effort\cr
#' - \tab \code{Xest} \tab Estimated number of occurrences; this should be equal to spt if the algorithm has converged\cr
#' - \tab \code{N>0.00} \tab Number of locations with non-zero probability of the species occurring\cr
#' - \tab \code{N>0.98} \tab Number of locations for which the probability of occurrence was estimated as greater than 0.98\cr
#' }
#'
#' \item{\bold{$stat}}{Location report}
#'
#' \tabular{rll}{
#' - \tab \code{Location} \tab Name of location; in this case locations are hectads of the GB National Grid \cr
#' - \tab \code{Loc_no} \tab Numbering (added) of locations in alphanumeric order \cr
#' - \tab \code{No_spp} \tab Number of species at that location; the actual number which may be zero \cr
#' - \tab \code{Phi_in} \tab Initial value of phi, the frequency-weighted mean frequency \cr
#' - \tab \code{Alpha} \tab Sampling effort multiplier (to achieve standard value of phi) \cr
#' - \tab \code{Wgt_n2} \tab effective number N2 for the neighbourhood weights; this is small if there are few floristically similar hectads close to the target hectad. It is (sum weights)^2 / (sum weights^2) \cr
#' - \tab \code{Phi_out} \tab Value of phi after rescaling; constant, if the algorithm has converged\cr
#' - \tab \code{Spnum_in} \tab Sum of neighbourhood frequencies before rescaling\cr
#' - \tab \code{Spnum_out} \tab Estimated species richness, i.e. sum of neighbourhood frequencies after rescaling\cr
#' - \tab \code{Iter} \tab Number of iterations for algorithm to converge\cr
#' }
#'
#' \item{\bold{$freq}}{Listing of rescaled species frequencies}
#'
#' \tabular{rll}{
#' - \tab \code{Location} \tab Name of location\cr
#' - \tab \code{Species} \tab Name of species\cr
#' - \tab \code{Pres} \tab Record of species in location (1 = recorded, 0 = not recorded)\cr
#' - \tab \code{Freq} \tab Frequency of species in neighbourhood of location\cr
#' - \tab \code{Freq_1} \tab Estimated probabilty of occurrence, i.e. frequency of species after rescaling\cr
#' - \tab \code{SD_Frq1} \tab Standard error of Freq_1, calculated on the assumption that Freq is a binomial variate with standard error sqrt(Freq*(1-Freq)/ Wgt_n2), where Wgt_n2 is as defined for samples.txt in section (b)\cr
#' - \tab \code{Rank} \tab Rank of frequency in neighbourhood of location\cr
#' - \tab \code{Rank_1} \tab Rescaled rank, defined as Rank/Estimated species richness\cr
#' }
#'
#' \item{\bold{$log}}{This records all the output sent to the console when running frescalo}
#'
#' \item{\bold{$lm_stats}}{The results of linear modelling of TFactors}
#'
#' \tabular{rll}{
#' - \tab \code{SPECIES} \tab Name of species used internally by frescalo\cr
#' - \tab \code{NAME} \tab Name of species as appears in raw data\cr
#' - \tab \code{b} \tab The slope of the model\cr
#' - \tab \code{a} \tab The intercept\cr
#' - \tab \code{b_std_err} \tab Standard error of the slope\cr
#' - \tab \code{b_tval} \tab t-value for a test of significance of the slope\cr
#' - \tab \code{b_pval} \tab p-value for a test of significance of the slope\cr
#' - \tab \code{a_std_err} \tab Standard error of the intercept\cr
#' - \tab \code{a_tval} \tab t-value for a test of significance of the intercept\cr
#' - \tab \code{a_pval} \tab p-value for a test of significance of the intercept\cr
#' - \tab \code{adj_r2} \tab Rescaled rank, defined as Rank/Estimated species richness\cr
#' - \tab \code{r2} \tab t-value for a test of significance of the intercept\cr
#' - \tab \code{F_val} \tab F-value of the model\cr
#' - \tab \code{F_num_df} \tab Degrees of freedom of the model\cr
#' - \tab \code{F_den_df} \tab Denominator degrees of freedom from the F-statistic\cr
#' - \tab \code{Ymin} \tab The earliest year in the dataset\cr
#' - \tab \code{Ymax} \tab The latest year in the dataset\cr
#' - \tab \code{change_...} \tab The percentage change dependent on the values given to \code{trend_option} and \code{NYears}.\cr
#' }
#' \bold{The following columns are only produced when there are only two time periods}
#' \tabular{rll}{
#' - \tab \code{Z_VAL} \tab Z-value for the significance test of the trend\cr
#' - \tab \code{SIG_95} \tab A logical statement indicating if the trend is significant (TRUE) or non-significant (FALSE)\cr
#' }
#' @keywords trends, frescalo
#' @references Hill, Mark. Local frequency as a key to interpreting species occurrence data when
#' recording effort is not known. 2011. \emph{Methods in Ecology and Evolution}, 3 (1), 195-205.
#' @import lme4
#' @importFrom reshape2 dcast
#' @import sp
#' @import gdata
#' @import ggplot2
#' @export
#' @examples
#' \dontrun{
#' # Load data
#' data(unicorns)
#'
#' fres_out <- frescalo(Data = unicorns,
#' frespath = file.path(getwd(), "frescalo.exe"),
#' time_periods = data.frame(start=c(1980,1990),end=c(1989,1999)),
#' site_col = 'site',
#' sp_col = 'species',
#' start_col = 'start_date',
#' end_col = 'end_date')
#'}
frescalo <-
function(Data, #your Data as a dataframe object
frespath, #path to the exe
time_periods, #a list of vector pairs used in frescalo (ie 'c((1990,1995),(1996,2000))')
site_col, # name of site column
sp_col, # name od species column
year_col = NULL, # the name of your year column
start_col = NULL, # name of start date column
end_col = NULL, # name of end date column
species_to_include = NULL, #A species list with which to subset your data
sinkdir = NULL,#where is the data going to be saved
plot_fres = FALSE,#do you want to plot the maps and do linear regression for frescalo?
Fres_weights = 'LCGB',#the name of the weights file in the frescalo directory to be used
non_benchmark_sp = NULL,#species not to be used as benchmarks
fres_site_filter = NULL, #optional list of sites not to be included in analysis
phi = 0.74, #phi value for frescalo
alpha = 0.27, #alpha value for frescalo
trend_option = 'arithmetic',
NYears = 10,
ignore.ireland = F,#do you want to remove Irish hectads?
ignore.channelislands = F ##do you want to remove Channel Islands (they are not UK)?
){
# Perform error checks
errorChecks(year_col = year_col, site_col = site_col, sp_col = sp_col,
start_col = start_col, end_col = end_col, phi = phi, alpha = alpha,
non_benchmark_sp = non_benchmark_sp, time_periods = time_periods)
# ensure time_periods is ordered chronologically (this orders by the first column - start year)
time_periods <- time_periods[with(time_periods, order(time_periods[,1])),]
# ensure the end years are all greater than the start years
if(any(time_periods[,2] < time_periods[,1])) stop('In time_periods end years must be greater than or equal to start years')
# ensure time periods dont overlap
if(nrow(time_periods) > 1){
if(any(time_periods[2:nrow(time_periods),1] <
time_periods[1:(nrow(time_periods)-1),2])){
stop('In time_periods year ranges should not overlap')
}
}
# Do a range of checks and data formatting
Data <- frescalo_checks(site_col,sp_col,year_col,start_col,end_col, Data,
time_periods, ignore.ireland, ignore.channelislands,
species_to_include)
# Create the sink directory if given
if(!is.null(sinkdir)){
dir.create(sinkdir,showWarnings = FALSE)
} else {
warning(paste('sinkdir not given. Defaulted to working directory',getwd()))
sinkdir <- getwd()
}
# Load in the weights file and save it to the exe location
unpacked <- unpack_fres_weights(Fres_weights, frespath)
Fres_weights_name <- unpacked$Fres_weights_name
# check weights file and input data match (in terms of sites)
weights_data_matchup(weights_sites = unpacked$site_names,
data_sites = unique(Data$site))
# Setup output
datecode <- format(Sys.Date(),'%y%m%d')
fresoutput <- paste(sinkdir,'/','frescalo_',datecode,sep='')
# if the output directory already has data in from a frescalo run with the same date
# then create a new directory adding on an index number
if(file.exists(fresoutput)){
# Create new name
fresoutput <- create_index_filename(sinkdir, datecode, fresoutput)
dir.create(fresoutput,showWarnings = FALSE)
warning(paste('sinkdir already contains frescalo output. New data saved in ',
fresoutput, sep = ''))
} else {
dir.create(fresoutput,showWarnings = FALSE)
}
# Set up species names
spp_names <- NULL
# Create a lookup table of old and new names
new_names <- data.frame(SPECIES = paste('S', 1:length(unique(Data$CONCEPT)), sep = ''),
NAME = sort(unique(Data$CONCEPT)))
# Save this for reference
spp_names <- paste(fresoutput, '/species_names.csv', sep = '')
write.table(new_names, spp_names, sep = ',', row.names = FALSE)
# Merge in the new species names and use these going into frescalo
Data <- merge(Data, new_names, by.x = 'CONCEPT', by.y = 'NAME', all = T)
Data <- Data[c('site', 'SPECIES', 'yearnew')]
# Create non benchmark list if needed
non_bench_txt <- NULL
if(!is.null(non_benchmark_sp)){
# Set up path to text file
NonBenchPath <- paste(dirname(frespath), '/NonBench.txt', sep = '')
# Get the new names of these species
non_benchmark_sp <- new_names$SPECIES[new_names$NAME %in% non_benchmark_sp]
# This write.table works for both data.frames (with one column) and vectors
write.table(non_benchmark_sp, sep = '\n', file = NonBenchPath,
row.names = F, col.names = F, quote = F)
non_bench_txt <- 'NonBench.txt'
}
# If needed create a site filter
fres_site_txt <- NULL
if(!is.null(fres_site_filter)){
# Setup path for this file
fres_site_path <- paste(dirname(frespath), '/fres_site_filter.txt', sep = '')
write.table(fres_site_filter, sep = '\n', file = fres_site_path,
row.names = F, col.names = F, quote = F)
fres_site_txt <- 'fres_site_filter.txt'
}
if(nrow(Data) == 0) stop("By Zeus' beard! The data heading into frescalo has 0 rows. Make sure your time periods match your years, and your not subsetting out all your data")
fres_return <- run_fresc_file(fres_data = Data,
output_dir = fresoutput,
frescalo_path = frespath,
fres_f_wts = Fres_weights_name,
Plot = plot_fres,
spp_names_file = spp_names,
fres_f_nobench = non_bench_txt,
fres_f_filter = fres_site_txt,
fres_phi_val = phi,
fres_bench_val = alpha)
fres_lm_path <- paste(fresoutput, '/Maps_Results/Frescalo Tfactor lm stats.csv',
sep = '')
# Calculate z-values if only two time periods & lm_stats exists
if(length(time_periods[,1]) == 2 & 'lm_stats' %in% names(fres_return)){
fres_lm <- read.csv(fres_lm_path)
trendpath <- paste(fresoutput, '/Output/Trend.txt', sep = '')
zvalues <- fres_zvalues(trendpath)
lm_z <- merge(x = fres_lm, y = zvalues, by = 'SPECIES', all = TRUE)
lm_z <- lm_z[with(lm_z, order(NAME)),]
write.csv(lm_z, fres_lm_path, row.names = FALSE)
fres_return$lm_stats <- lm_z
}
# Calculate percentage change if more than two time periods & lm_stats exists
if(length(time_periods[,1]) > 2 & 'lm_stats' %in% names(fres_return)){
fres_return$lm_stats[paste('change_',NYears,'yr',sep='')]<-percentageChange(intercept = fres_return$lm_stats['a'],
slope = fres_return$lm_stats['b'],
Ymin = fres_return$lm_stats['Ymin'],
Ymax = fres_return$lm_stats['Ymax'],
NYears = NYears,
option = trend_option,
probability = FALSE)
write.csv(fres_return$lm_stats, fres_lm_path, row.names = FALSE)
}
# Remove .txt files created by frescalo. These have been converted to .csv
unlink(fres_return$paths[2:4])
# Update paths
fres_return$paths[2:4] <- gsub('.txt', '.csv', fres_return$paths[2:4])
print('frescalo complete')
return(fres_return)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.