R/PlotNetLogoData.R

Defines functions barcode.timecourse cell.abundance.timecourse switch.rate.timecourse response.error.timecourse degrade.rate.timecourse phenotype.histogram phenoevo.heatmap response.error.heatmap switch.rate.heatmap degrade.rate.heatmap generation.heatmap survival.heatmap

Documented in barcode.timecourse cell.abundance.timecourse degrade.rate.heatmap degrade.rate.timecourse generation.heatmap phenoevo.heatmap phenotype.histogram response.error.heatmap response.error.timecourse survival.heatmap switch.rate.heatmap switch.rate.timecourse

#### heat maps for comparing across models at final timepoints

#' Heatmap of population survival duration in NetLogo experiments
#'
#' Generates a plot with three dimensions of data: two experimental parameters
#' (for instance, toxin concentration and diffusion rate) as axes, and the
#' number of timepoints that each population survived before extinction given as
#' a color gradient from pale to dark red. It is a snapshot of several
#' experiments from a parameter sweep, at a single timepoint. For ease of
#' interpretation, the boxes in the heatmap can be labeled by run number.
#'
#' As with other endpoint-summary plots, this function can be used with data
#' from any timepoint, as long as the ends.df dataframe contains data from only
#' one timepoint per run number. For a function to create a heatmap with any
#' variable you wish (instead of population survival), see
#' [phenoevo.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param nums Should each box in the heatmap be labeled with its run number?
#'   T/F. Default: F
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' survival.heatmap(PE.ends, toxin.conc, env.noise, nums=TRUE)
survival.heatmap<-function(ends.df, xvar, yvar, nums=FALSE){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  ps<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=step)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle('Population survival duration') +
    ggplot2::scale_fill_gradient(low='#fff5f0', high='#67000d', name='')
  if (nums==TRUE){
    ps<-ps + ggplot2::geom_text(size=2, ggplot2::aes(label=run.number))
  }
  return(ps)
}


#' Heatmap showing average number of generations in many Pheno-Evo populations
#'
#' Each cell in the Pheno-Evo model carries a number indicating the number of
#' generations elapsed between the founding ancestor cells and its birth. At a
#' given timepoint, the average of the generation numbers of all cells in the
#' population can serve as a rough indicator of the population's overall growth
#' rate. [generation.heatmap()] shows three dimensions of data: two
#' experimental parameters (for instance, toxin concentration and diffusion
#' rate) as axes, and the population average generation number as color gradient
#' from pale yellow to dark green. It is a snapshot of several experiments from
#' a parameter sweep at a single timepoint.
#'
#' As with other endpoint-summary plots, this function can be used with data
#' from any timepoint, as long as the ends.df dataframe contains data from only
#' one timepoint per run number. For a function to create a heatmap with any
#' variable you wish (instead of generation number), see
#' [phenoevo.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' Must contain a generation.mean column (use [summarize.endpoint()])
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' generation.heatmap(PE.ends, toxin.conc, env.noise)
generation.heatmap<-function(ends.df, xvar, yvar){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  ps<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=mean.generation)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle('Mean number of generations') +
    ggplot2::scale_fill_gradient(low='#ffffe5', high='#004529', name='')
  return(ps)
}


#' Heatmap showing average phenotypic degrade rate in many Pheno-Evo populations
#'
#' This plot shows three dimensions of data: two experimental parameters (for
#' instance, toxin concentration and diffusion rate) as axes, and the population
#' average degrade rate as color gradient from light to dark blue. It is a
#' snapshot of several experiments from a parameter sweep, at a single
#' timepoint.
#'
#' As with other endpoint-summary plots, this function can be used with data
#' from any timepoint, as long as the ends.df dataframe contains data from only
#' one timepoint per run number. It is a snapshot of several experiments from a
#' parameter sweep, at a single timepoint. For a function to create a heatmap
#' with any variable you wish (instead of degrade rate), see
#' [phenoevo.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' Must contain a degrade.rate.mean column (use [summarize.endpoint()]
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' degrade.rate.heatmap(PE.ends, toxin.conc, env.noise)
degrade.rate.heatmap<-function(ends.df, xvar, yvar){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  pdr<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=mean.degrade.rate)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle('Degrade rate mean') +
    ggplot2::scale_fill_gradient(low='#f7fbff', high='#08306b', name='')
  return(pdr)
}


#' Heatmap showing average switch rate in many Pheno-Evo populations
#'
#' This plot shows three dimensions of data: two experimental parameters (for
#' instance, toxin concentration and diffusion rate) as axes, and the population
#' average switch rates as color gradient from beige to dark red-brown. It is a
#' snapshot of several experiments from a parameter sweep, at a single
#' timepoint.
#'
#' As with other endpoint-summary plots, this function can be used with data
#' from any timepoint, as long as the ends.df dataframe contains data from only
#' one timepoint per run number. For a function to create a heatmap with any
#' variable you wish (instead of switch rate), see
#' [phenoevo.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' Must contain a mean.switch.rate column (use [summarize.endpoint()]).
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' switch.rate.heatmap(PE.ends, toxin.conc, env.noise)
switch.rate.heatmap<-function(ends.df, xvar, yvar){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  psr<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=mean.switch.rate)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle('Switch rate mean') +
    ggplot2::scale_fill_gradient(low='#fff5eb', high='#7f2704', name='')
  return(psr)
}



#' Heatmap showing average response error in many Pheno-Evo populations
#'
#' This plot shows three dimensions of data: two experimental parameters (for
#' instance, toxin concentration and diffusion rate) as axes, and the population
#' average response errors as color gradient from light to dark purple. It is a
#' snapshot of several experiments from a parameter sweep, at a single
#' timepoint.
#'
#' As with other endpoint-summary plots, this function can be used with data
#' from any timepoint, as long as the ends.df dataframe contains data from only
#' one timepoint per run number. For a function to create a heatmap with any
#' variable you wish (instead of response error), see
#' [phenoevo.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' Must contain a mean.reponse.error column (use [summarize.endpoint()])
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' response.error.heatmap(PE.ends, toxin.conc, env.noise)
response.error.heatmap<-function(ends.df, xvar, yvar){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  pre<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=mean.response.error)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle('Response Error mean') +
    ggplot2::scale_fill_gradient(low='#fcfbfd', high='#3f007d', name='')
  return(pre)
}



#' Heatmap showing population average of custom variable in many Pheno-Evo
#' populations
#'
#' This plot shows three dimensions of data: two experimental parameters (for
#' instance, toxin concentration and diffusion rate) as axes, and the population
#' average of any trait of choice as color gradient from white to black. It is a
#' snapshot of several experiments from a parameter sweep, at a single
#' timepoint.
#'
#' To calculate the population mean for your trait of choice, use
#' [summarize.endpoint()] As with other endpoint-summary plots, this
#' function can be used with data from any timepoint, as long as the ends.df
#' dataframe contains data from only one timepoint per run number. For functions
#' designed for specific variables with pre-designated color schemes, see also
#' [degrade.rate.heatmap()], [switch.rate.heatmap()],
#' [response.error.heatmap()], [generation.heatmap()],
#' and [survival.heatmap()].
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' @param xvar Experimental parameter for plotting on x-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on y-axis. Must be a column
#'   name of ends.df, with no quotation marks.
#' @param gradientvar Variable to be expressed as color gradient in heat map
#'   fill. Must be the name of a column in ends.df containing population-mean
#'   data.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' phenoevo.heatmap(PE.ends, toxin.conc, env.noise, mean.response.error)
phenoevo.heatmap<-function(ends.df, xvar, yvar, gradientvar){
  xvar<-ggplot2::enquo(xvar)
  yvar<-ggplot2::enquo(yvar)
  gradientvar<-ggplot2::enquo(gradientvar)
  peg<-ggplot2::ggplot(data=ends.df, ggplot2::aes(x=!!xvar, y=!!yvar, fill=!!gradientvar)) +
    ggplot2::geom_tile() + ggplot2::theme_bw() + ggplot2::ggtitle(gradientvar) +
    ggplot2::scale_fill_gradient(low='white', high='black', name='')
  return(peg)
}
######################################################################################


#' Plot all distributions of phenotypic degrade rates at the final timepoint
#'
#' For a dataset composed of multiple Pheno-Evo experiments in a parameter
#' sweep, [phenotype.histogram()] produces a multi-panel plot in
#' which each panel is a histogram showing the full distribution of degrade.rate
#' values in the population for one experiment at the final timepoint, and the
#' panels are arranged according to the two parameters of interest. Within each
#' panel, the x-axis axis is the degrade rate phenotype and the y-axis is the %
#' of population at that degrade rate.
#'
#' As with any of the endpoint functions, you can use this function on
#' timepoints other than the final timepoint, as long as the input dataframe has
#' only one row per run number.
#'
#' @param ends.df Dataframe of endpoint data, generated by extract.endpoint().
#' @param xvar Experimental parameter for plotting on the major x-axis, across
#'   panels. Must be a column name of ends.df, with no quotation marks.
#' @param yvar Experimental parameter for plotting on the major y-axis, across
#'   panels. Must be a column name of ends.df, with no quotation marks.
#' @param nums Should panels be labeled by run number? T/F
#'
#' @return A ggplot object using geom_line and facet_grid. This can be modified
#'   in the typical ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PE.ends)
#' phenotype.histogram(PE.ends, xvar=env.noise, yvar=toxin.conc, nums=TRUE)
phenotype.histogram<-function(ends.df, xvar, yvar, nums=FALSE){
  # first, generate dataframe with histogram data
  run.nums<-unique(ends.df$run.number)
  dr.df<-parsenumeric(ends.df$degrade.rate[run.nums[1]]) # pull out the listed data from within one cell into a separate dataframe
  for (runnum in run.nums[2:length(run.nums)]){ # loop to repeat for each run
    dr.df<-cbind(dr.df, parsenumeric(ends.df$degrade.rate[ends.df$run.number==runnum])) # bind them all together
  }
  colnames(dr.df)<-run.nums
  dr.df$degrade.rate<-seq(0.01, 1, 0.01) # add numbers by which to bin for degrade rate values
  dr.df<-reshape2::melt(dr.df, id.vars='degrade.rate', variable.name='run.number', value.name='abundance') # change table format
  xvar.e<-rlang::enexpr(xvar) # quote variables xvar and yvar
  yvar.e<-rlang::enexpr(yvar)
  summary.df<-unique(data.frame(ends.df[['run.number']], ends.df[[xvar.e]], ends.df[[yvar.e]])) # make a table with variables you care about
  colnames(summary.df)<-c('run.number', xvar.e, yvar.e)
  dr.df<-merge(dr.df, summary.df, by='run.number') # add that metadata table to the histogram data
  # then, plot panels
  hists<-ggplot2::ggplot(dr.df, ggplot2::aes(x=degrade.rate, y=abundance)) + ggplot2::geom_line() +
    ggplot2::facet_grid(rows=ggplot2::vars(!!yvar.e), cols=ggplot2::vars(!!xvar.e)) + ggplot2::theme_bw() +
    ggplot2::xlab(paste('Degrade rate (panels=', xvar.e, ')')) +
    ggplot2::ylab(paste('% of cells (panels=', yvar.e, ')'))
  if (nums==TRUE){
    hists<-hists + ggplot2::geom_text(x=0.25, y=75, ggplot2::aes(label=run.number))
  }
  return(hists)
}
# potential fixes still to implment:
# change order of panels so that 0,0 is in lower left (do this by defining factor levels)
# change format of numbers along x-axis for better readability


######################################################################################

## plots for timecourse data
# functionality to introduce later: ability to specify time window to plot


#' Plot population distribution of degrade.rate over time
#'
#' Generates a heatmap displaying how the distribution of phenotypic degrade rates in a population
#' changes over the course of a Pheno-Evo experiment. Time is displayed on the
#' x-axis, the range of possible degrade.rate values on the y-axis, and relative
#' abundance of a particular degrade.rate value given by color.
#'
#' To reduce plot size and complexity, if the input dataframe contains
#' more than 1,000 timepoints, only every 10th timepoint is plotted.
#' See also [switch.rate.timecourse()] and [response.error.timecourse()]}.
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments at all timepoints, imported from NetLogo.
#' @param runnum The run number of the experiment of interest.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PhenoEvoData_4)
#' degrade.rate.timecourse(PhenoEvoData_run4, 4)
degrade.rate.timecourse<-function(NLdata, runnum){
  df<-subset(NLdata, run.number==runnum)
  t3<-parsenumeric(df$degrade.rate[1])
  t3$s4<-as.numeric(t3$s4)
  if (dim(df)[1]>1000){
    for (tick in seq(10,dim(df)[1],10)){ # loop through every 10th timepoint
      t3<-cbind(t3, parsenumeric(df$degrade.rate[tick]))}
    colnames(t3)<-c(1, seq(10,dim(df)[1],10))
    w<-10
  }
  else {
    for (tick in seq(2,dim(df)[1],1)){ # take every timepoint
      t3<-cbind(t3, parsenumeric(df$degrade.rate[tick]))}
    colnames(t3)<-c(1, seq(2,dim(df)[1],1))
    w<-1
  }
  t3$degrade.rate<-seq(0.01, 1, 0.01)
  degraderates<-reshape2::melt(t3, id.vars='degrade.rate',
                     variable.name='timepoint', value.name='abundance')
  degraderates$timepoint<-as.numeric(as.character(degraderates$timepoint))
  p2<-ggplot2::ggplot(degraderates, ggplot2::aes(x=timepoint, y=degrade.rate, fill=abundance)) +
    ggplot2::geom_tile(width=w, height=0.01) +
    ggplot2::scale_fill_gradient(low='#f7fbff', high='#08306b', name='% of \npopulation') +
    ggplot2::ylab('Degradation rate') + ggplot2::xlab('Time (ticks)') + ggplot2::ggtitle(paste('Run #', runnum))
  return(p2)
}


#' Plot population distribution of response.error over time
#'
#' Generates a heatmap displaying how the genotypic response error changes over
#' the course of a Pheno-Evo experiment. Time is displayed on the x-axis, the
#' range of possible response error values on the y-axis, and relative abundance
#' given by color. To reduce plot size and complexity, if the input dataframe contains
#' more than 1,000 timepoints, only every 10th timepoint is plotted.
#'
#' See also [switch.rate.timecourse()] and [degrade.rate.timecourse()].
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments at all timepoints, imported from NetLogo.
#' @param runnum The run number of the experiment of interest.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PhenoEvoData_4)
#' response.error.timecourse(PhenoEvoData_run4, 4)
response.error.timecourse<-function(NLdata, runnum){
  df<-subset(NLdata, run.number==runnum)
  t3<-parsenumeric(df$response.error[1])
  t3$s4<-as.numeric(t3$s4)
  if (dim(df)[1]>1000){
    for (tick in seq(10,dim(df)[1],10)){ # loop through every 10th timepoint
      t3<-cbind(t3, parsenumeric(df$response.error[tick]))}
    colnames(t3)<-c(1, seq(10,dim(df)[1],10))
    w<-10
  }
  else {
    for (tick in seq(2,dim(df)[1],1)){ # take every timepoint
      t3<-cbind(t3, parsenumeric(df$response.error[tick]))}
    colnames(t3)<-c(1, seq(2,dim(df)[1],1))
    w<-1
  }
  t3$response.error<-seq(0.01, 1, 0.01)
  responserrors<-reshape2::melt(t3, id.vars='response.error',
                      variable.name='timepoint', value.name='abundance')
  responserrors$timepoint<-as.numeric(as.character(responserrors$timepoint))
  p2<-ggplot2::ggplot(responserrors, ggplot2::aes(x=timepoint, y=response.error, fill=abundance)) +
    ggplot2::geom_tile(width=w, height=0.01) +
    ggplot2::scale_fill_gradient(low='#fcfbfd', high='#3f007d', name='% of \npopulation') +
    ggplot2::ylab('Response error') + ggplot2::xlab('Time (ticks)') + ggplot2::ggtitle(paste('Run #', runnum))
  return(p2)
}




#' Plot population distribution of switch.rate over time
#'
#' Generates a heatmap displaying how the genotypic switch rate changes over the
#' course of a Pheno-Evo experiment. Time is displayed on the x-axis, the range
#' of possible switch rate values on the y-axis, and relative abundance given by
#' color. To reduce plot size and complexity, if the input dataframe contains
#' more than 1,000 timepoints, only every 10th timepoint is plotted.
#'
#' #' See also [degrade.rate.timecourse()] and [response.error.timecourse()].
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments at all timepoints, imported from NetLogo.
#' @param runnum The run number of the experiment of interest.
#'
#' @return A ggplot object using geom_tile. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PhenoEvoData_4)
#' switch.rate.timecourse(PhenoEvoData_run4, 4)
switch.rate.timecourse<-function(NLdata, runnum){
  df<-subset(NLdata, run.number==runnum)
  t3<-parsenumeric(df$switch.rate[1])
  t3$s4<-as.numeric(t3$s4)
  if (dim(df)[1]>1000){
    for (tick in seq(10,dim(df)[1],10)){ # loop through every 10th timepoint
      t3<-cbind(t3, parsenumeric(df$switch.rate[tick]))}
    colnames(t3)<-c(1, seq(10,dim(df)[1],10))
    w<-10
  }
  else {
    for (tick in seq(2,dim(df)[1],1)){ # take every timepoint
      t3<-cbind(t3, parsenumeric(df$switch.rate[tick]))}
    colnames(t3)<-c(1, seq(2,dim(df)[1],1))
    w<-1
  }
  t3$switch.rate<-seq(0.01, 1, 0.01)
  switchrates<-reshape2::melt(t3, id.vars='switch.rate',
                    variable.name='timepoint', value.name='abundance')
  switchrates$timepoint<-as.numeric(as.character(switchrates$timepoint))
  p2<-ggplot2::ggplot(switchrates, ggplot2::aes(x=timepoint, y=switch.rate, fill=abundance)) +
    ggplot2::geom_tile(width=w, height=0.01) +
    ggplot2::scale_fill_gradient(low='#fff5eb', high='#7f2704', name='% of \npopulation') +
    ggplot2::ylab('Switch rate') + ggplot2::xlab('Time (ticks)') + ggplot2::ggtitle(paste('Run #', runnum))
  return(p2)
}




#' Plot total cell population and toxin concentration over time
#'
#' Produces a simple line plot showing the total population of cells in red, and
#' the average concentration of toxin per patch in black, over time.
#'
#' As this function is usually used for the purposes of displaying general
#' population behavior, results may be abridged: if the input dataframe has more
#' than 1,000 timepoints, only the final 500 timepoints are plotted. An attempt
#' is made to scale the y-axis of the cell abundance plot so that it can be
#' displayed on the same plot as the toxin plot.
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments at all timepoints, imported from NetLogo.
#' @param runnum The run number of the experiment of interest.
#'
#' @return A ggplot object using geom_line. This can be modified in the typical
#'   ggplot way by adding layers, scales, etc.
#' @export
#'
#' @examples
#' data(PhenoEvoData_4)
#' cell.abundance.timecourse(PhenoEvoData_run4, 4)
cell.abundance.timecourse<-function(NLdata, runnum){
  df1<-subset(NLdata, run.number==runnum, select=c(step, mean.toxin, count.turtles))
  if (dim(df1)[1]>1000){ # if input dataframe has more than 1000 rows,
    df1<-df1[(dim(df1)[1]-500):(dim(df1)[1]),] # take just the last 500.
  }
  scaling<-ifelse(max(df1$mean.toxin)>0, # try to scale toxin and turtles so both are visible,
                  max(df1$count.turtles)/max(df1$mean.toxin), 1) # but only if toxin > 0
  p8<-ggplot2::ggplot(data=df1, ggplot2::aes(x=step)) + ggplot2::geom_line(ggplot2::aes(y=mean.toxin)) +
    ggplot2::geom_line(ggplot2::aes(y=count.turtles/scaling), color='red') +
    ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(~.*scaling, name='Number of Turtles')) +
    ggplot2::theme_bw() + ggplot2::xlab('Timepoint (ticks)') + ggplot2::ylab('Average toxin per patch') +
    ggplot2::ggtitle(paste('Run #', runnum))
  return(p8)
}


#' Plot abundance of barcode lineages over time.
#'
#' Generates a streamgraph plot displaying the abundance of each unique barcode
#' lineage as a proportion of the population, over time, in a Pheno-Evo
#' experiment. At the initiation of the experiment, each founding cell is given
#' a unique barcode. A barcode lineage consists of a founding cell and all its
#' descendants. Over time, especially with high population turnover, some
#' barcodes go extinct. This plot shows barcodes as different colors, with their
#' proportional abundance as their width along the y-axis, and time on the
#' x-axis.
#'
#' There is not a unique color for each barcode. This plot uses a 12-color
#' palette that is repeated several times to cover all the barcodes. However,
#' each lineage "stream" is bordered by a black line, so if two lineages that
#' happen to have the same color end up adjacent, they can be distinguished by
#' the border between them.
#'
#' @param NLdata The full dataframe containing data from all BehaviorSpace
#'   experiments at all timepoints, imported from NetLogo.
#' @param runnum The run number of the experiment of interest.
#'
#' @return A ggplot object using [ggTimeSeries::stat_steamgraph()].
#' @export
#'
#' @examples
#' data(PhenoEvoData_4)
#' barcode.timecourse(PhenoEvoData_run4, 4)
barcode.timecourse<-function(NLdata, runnum){
  df<-subset(NLdata, run.number==runnum)
  t1<-parsefactors.norm(df$barcode[1])[,c(1,3)] # make a base dataframe with relative abundance of each barcode
  if(dim(df)[1]>=1000){ # if dataframe has 1,000 rows or more,
    for (tick in seq(10,dim(df)[1],10)){ # take only every 10th timepoint
      t1<-suppressWarnings(merge(t1, parsefactors.norm(df$barcode[tick])[,c(1,3)],
                                 by='s3', all=T)) # suppress warnings about duplicated column names
      w<-10
    }
    colnames(t1)<-c('barcode', 1, seq(10,dim(df)[1],10))
  }
  if(dim(df)[1]<=999){ # if there are 999 rows or fewer, use all of them
    for (tick in seq(2,dim(df)[1]-1,1)){
      t1<-suppressWarnings(merge(t1, parsefactors.norm(df$barcode[tick])[,c(1,3)],
                                 by='s3', all=T)) # suppress warnings about duplicated column names
      w<-1
    }
    colnames(t1)<-c('barcode', 1, seq(2,dim(df)[1]-1,1))
  }
  barcodes<-reshape2::melt(t1, id.vars='barcode', na.rm=T, # restructure dataframe
                 variable.name='timepoint', value.name='abundance')
  barcodes$barcode<-factor(barcodes$barcode)
  barcodes$timepoint<-as.numeric(as.character(barcodes$timepoint))
  colors<-length(unique(barcodes$barcode))/12 + 1 # figure out how many times to repeat the 12-color colorscheme
  p4<-ggplot2::ggplot(barcodes, ggplot2::aes(x=w*timepoint, y=abundance, group=barcode, fill=barcode)) +
    ggTimeSeries::stat_steamgraph(color='black', size=0.1) +
    ggplot2::scale_fill_manual(values=rep(RColorBrewer::brewer.pal(12, 'Paired'), colors)) +
    ggplot2::theme_bw() + ggplot2::theme(legend.position='none') + ggplot2::ggtitle(paste('Run #', runnum)) +
    ggplot2::xlab('Timepoint (ticks)') + ggplot2::ylab('Barcode relative abundance')
  return(p4)
}



######################################################################################

# notes that helped with this code:
# see https://www.tidyverse.org/blog/2018/07/ggplot2-tidy-evaluation/
# for programmatic evaluation of aesthetic mappings
jessicaaudreylee/PhenoEvoR documentation built on Sept. 7, 2020, 3:50 a.m.