R/EndpointSummaryStats.R

Defines functions degraderate.variogram degraderate.snapshot calc.fun.div summarize.endpoint extract.endpoint

Documented in calc.fun.div degraderate.snapshot degraderate.variogram extract.endpoint summarize.endpoint

## Functions for summarizing characteristics of the community
## at the final timepoint of the experiment

## Possible improvements for the future: average over a time window
## rather than taking a snapshot


#' Extract the final timepoints from a set of NetLogo experiment runs
#'
#' Creates a new dataframe for easy analysis comparing multiple Pheno-Evo model
#' runs from a parameter sweep using NetLogo BehaviorSpace, by storing only the
#' end results of each model run. If the experiments in your BehaviorSpace run
#' lasted for differing lengths of time (e.g., if some populations went extinct
#' rapidly and those experiments ended prematurely), this function will figure
#' out what the last timepoint was that had a nonzero population, and extract
#' it.
#'
#' It is fairly important to make sure that the column names are standardized as
#' described in the Pheno-Evo tutorial.
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments, imported from NetLogo.
#' @param run.nums A vector of the run numbers given to the experiments by
#'   BehaviorSpace.
#'
#' @return A new dataframe with all the data from just the final timepoints of
#'   all experiments.
#' @export
#'
#' @examples
#' data(PhenoEvoData_small)
#' PE.ends_small<-extract.endpoint(PhenoEvoData_small, c(2, 4, 6))
extract.endpoint<-function(NLdata, run.nums){
  ends1<-data.frame()
  for (runnum in run.nums){ # loop through each run
    finaltimepoint<-max((NLdata$step)[NLdata$count.turtles>0 & NLdata$run.number==runnum])
    finaltimepointdata<-NLdata[(NLdata$run.number==runnum & NLdata$step==finaltimepoint),]
    ends1<-rbind(ends1, finaltimepointdata)
  }

  return(ends1)
}




#' Take population average of single-cell traits
#'
#' In most Pheno-Evo experiments you will want to store the values of traits,
#' such as degrade rate, for every cell in the population at every timepoint;
#' however, in many cases it can be helpful to analyze just the population means
#' of those traits. This function calculates mean values of the traits you ask
#' it to, and appends those means to your existing endpoint dataframe.
#'
#' Dataframes from timepoints other than the endpoint are also fine, as long as
#' there is only 1 row per run number.
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#'
#' @param traitlist A vector of the names of the variables for which means
#'   should be calculated; names should match the corresponding column names,
#'   and each should be enclosed in quotation marks.
#'
#' @return A new dataframe consisting of the original input dataframe with
#'   appended columns, one for each trait averaged.
#' @export
#'
#' @examples
#' data(PhenoEvoData_small)
#' PE.ends_small<-extract.endpoint(PhenoEvoData_small, c(2, 4, 6))
#' pop.level.traits<-c('degrade.rate','switch.rate','response.error','generation')
#' PE.ends_small<-summarize.endpoint(PE.ends_small, pop.level.traits)
summarize.endpoint<-function(ends.df, traitlist){
  newcolnames<-c(colnames(ends.df), paste0('mean.',traitlist))
  for (trait in traitlist){
      trait.means<-as.numeric(lapply(ends.df[[trait]],
                                     function(x){mean(extractnumeric(x))}))
      ends.df<-cbind(ends.df, trait.means)
  }
  colnames(ends.df)<-newcolnames
  return(ends.df)
}



#' Calculate functional diversity of the phenotypic or genotypic variable of
#' your choice
#'
#' Implements the function [FD::dbFD()] from the package FD, for use
#' on Pheno-Evo data. It calculates functional richness (FRic), functional
#' evenness (FEve), functional divergence (FDiv), and functional dispersion
#' (FDis). Please read the [FD::dbFD()] documentation for a more
#' thorough explanation and references.
#'
#' Note that not all diversity metrics are calculable for all datasets. You may
#' get an error message warning you of metrics that can't be calculated for
#' yours. Dataframes from timepoints other than the endpoint are also fine, as
#' long as there is only 1 row per run number.
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#'   Must contain individual-level data for the trait in question.
#' @param trait Trait for which you want to calculate diversity. This must use
#'   same name as in column names of ends.df, and be enclosed in quotation
#'   marks.
#'
#' @return A new dataframe that provides the values of four diversity metrics
#'   for each run number.
#' @export
#'
#' @examples
#' data(PhenoEvoData_small)
#' PE.ends_small<-extract.endpoint(PhenoEvoData_small, c(2, 4, 6))
#' calc.fun.div(PE.ends_small, trait='degrade.rate')
calc.fun.div<-function(ends.df, trait){
  newdf<-data.frame(null=rep(1,100))
  for (runnum in 1:dim(ends.df)[1]){ # loop through each run
    drs<-parsenumeric(ends.df[[trait]][runnum]) # turn the values of that trait into a column of a dataframe
    colnames(drs)<-runnum
    newdf<-cbind(newdf, drs) # bind together columns from all runs into one dataframe
  }
  abunds<-t(newdf) # transpose so that experiment runs are rows
  traits<-data.frame(degraderate=seq(0.01, 1, 0.01))
  traitnames<-paste0('trait',seq(1,dim(abunds)[2],1))
  rownames(traits)<-traitnames # dr.traits defines the bins for the individual traits
  colnames(abunds)<-traitnames # dr.abunds expresses the proportion of the population in each trait bin
  # dr.traits and dr.abunds are the two dataframes you need to feed into the dbFD function
  fd1<-FD::dbFD(x=traits, a=abunds) # calculate functional diversity. fd1 is a functional diversity object
  # next, add the calculated values back to "ends" dataframe. remove the values from the "null" sample
  # in the future, may want to normalize by the value of the null sample. Not sure yet.
  ends.new<-data.frame(run.number=ends.df[,1]) # make a new dataframe to hold the calculated metrics
  ends.new$nbsp<-fd1$nbsp[-1]-1 # subtract the extra "species" counted because of the "1"s in the null column
  ends.new$FRic<-fd1$FRic[-1]
  ends.new$FEve<-fd1$FEve[-1]
  ends.new$FDis<-fd1$FDis[-1]
  return(ends.new)
  }





################################################################################
## spatial analysis


#' Replot values of degrade-rate in 2D space
#'
#' Generate a plot showing all the cells at their locations from one timepoint
#' of a Pheno-Evo experiment, color-coded by degrade rate, for viewing purposes.
#' This function is primarily to give the user a general idea of the
#' distribution of cells and degrade rate values at the timepoint of interest.
#'
#' Generally, that timepoint will be the final timepoint of the experiment,
#' unless the user manually generates a dataframe with data from a different
#' timepoint. Note that the color scale is dramatically different from the one
#' used in the Pheno-Evo model. This function uses [sp::spplot()] from
#' the SP package.
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#'   Must contain an xydr column.
#' @param run.num Run number to be plotted.
#'
#' @return A plot of the 2D model grid, with cells represented by squares in
#'   their original positions and color-coded by degrade.rate.
#' @export
#'
#' @examples
#' data(PE.ends)
#' degraderate.snapshot(PE.ends, run.num=4)
degraderate.snapshot<-function(ends.df, run.num){
  str1<-ends.df$xydr[ends.df$run.number==run.num] # pull out the character string from that dataframe cell
  df1<-extractXYdr(str1) # extract into a dataframe
  sp::coordinates(df1) = ~xcor+ycor # designate spatial coordinates
  sp::gridded(df1) = T # specify that it's gridded spatial data
  return(sp::spplot(df1['degrade.rate']))
}




#' Calculate the variogram for degrade.rate for a given experiment
#'
#' This function adapts Pheno-Evo data and then uses
#' [gstat::variogram()] from the gstat package to calculate spatial
#' correlation in the phenotypic degrade rate values of the cells. It returns a
#' dataframe. For a variogram plot, call plot() on the dataframe - see examples.
#'
#' Dataframes from timepoints other than the endpoint are also fine, as long as
#' there is only 1 row per run number.
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#'   Must contain an xydr column.
#' @param run.num Run number to be analyzed.
#'
#' @return An object of class gstatVariogram. See [gstat::variogram()]
#'   for documentation
#' @export
#'
#' @examples
#' data(PE.ends)
#' degraderate.variogram(PE.ends, 4)
#' plot(degraderate.variogram(PE.ends, 4))
degraderate.variogram<-function(ends.df, run.num){
  str1<-ends.df$xydr[ends.df$run.number==run.num] # pull out the character string from that dataframe cell
  df1<-extractXYdr(str1) # extract into a dataframe
  sp::coordinates(df1) = ~xcor+ycor # designate spatial coordinates
  sp::gridded(df1) = T # specify that it's gridded spatial data
  vgm1<-gstat::variogram(degrade.rate~1, df1)
  return(vgm1)
}
jessicaaudreylee/PhenoEvoR documentation built on Sept. 7, 2020, 3:50 a.m.