bobfunctions2: Bob's utility functions

Documented in bin_2d box_blur buffer_range check_data_entry cubic_spline dinvgamma dist_gc dna_complement dna_to_aa fractal_noise func_header gamma_incomplete get_projection gg3d_add_grid_lines gg3d_scatterplot header layout_mat list_to_matrix lonlat_to_bearing moving_average object.size_auto perlin_noise pinvgamma plot_density project_2d project_2d_line qinvgamma rCRP rDPM rinvgamma set_compare setdiff_symmetric sim_lattice_biallelic sim_wrightfisher smooth_cols vec2mat

#------------------------------------------------
#' @title Produce script header text
#'
#' @description Produce header text to copy and paste into new script. Different
#'   header types are available for different types of script (see below).
#'
#' @param type switches between the following text blocks:
#'   \itemize{
#'     \item{type = 1: basic header including name, author, date and purpose}
#'     \item{type = 2: as above, plus list of useful commands and shortcuts when
#'     working with packages}
#'   }
#' @param copy_clipboard whether the header text should also be copied to the
#'   clipboard. This should be \code{FALSE} when running on PC as it can cause R
#'   to crash.
#'
#' @export

header <- function(type = 1, copy_clipboard = FALSE) {
  
  # check inputs
  assert_single_int(type)
  assert_in(type, 1:2)
  assert_single_logical(copy_clipboard)
  
  # make basic text list
  s <- list()
  s <- c(s, "")
  s <- c(s, "# .R")
  s <- c(s, "#")
  s <- c(s, "# Author: Bob Verity")
  s <- c(s, paste0("# Date: ", Sys.Date()))
  s <- c(s, "#")
  s <- c(s, "# Inputs: (none)")
  s <- c(s, "#")
  s <- c(s, "# Outputs: (none)")
  s <- c(s, "#")
  s <- c(s, "# Purpose:")
  s <- c(s, "# (this is an example header)")
  s <- c(s, "#")
  
  # type2 text
  if (type == 2) {
    s <- c(s, "")
    s <- c(s, "# RStudio shortcuts:")
    s <- c(s, "#    cmd+shift+L     : load package from local version")
    s <- c(s, "#    cmd+shift+D     : document (NB, option must be activated in Build Tools)")
    s <- c(s, "#    cmd+shift+E     : check")
    s <- c(s, "#    cmd+shift+T     : test")
    s <- c(s, "")
    s <- c(s, "# Useful commands:")
    s <- c(s, "# devtools::install()  # install package")
    s <- c(s, "# pkgdown::build_site() # build all pages of pkgdown website")
    s <- c(s, "# pkgdown::build_article('my-article')  # build single vignette")
    s <- c(s, "# check('.', args = '--no-examples')  # run checks without examples")
    s <- c(s, "# covr::report()    # interactive coverage report")
  }
  
  # further shared text
  s <- c(s, "# ------------------------------------------------------------------")
  
  # combine text
  ret <- paste(unlist(s), collapse = "\n")
  
  # copy to clipboard
  if (copy_clipboard) {
    clip <- pipe("pbcopy", "w")                       
    write(ret, file = clip)                               
    close(clip)
  }
  
  # print text to console
  cat(ret)
}

#------------------------------------------------
#' @title Produce function header text
#'
#' @description Produce header text for a single function to paste into script.
#'   Different header types are available for different types of function (see
#'   below). Header text is written in roxygen format.
#'
#' @param type switches between the following text blocks:
#'   \enumerate{
#'     \item basic function header
#'     \item complete function header with all the bells and whistles
#'     \item function that is not exported
#'     \item S3 method, e.g. \code{print.my_class()}
#'   }
#' @param copy_clipboard whether the header text should also be copied to the
#'   clipboard. This should be \code{FALSE} when running on PC as it can cause R
#'   to crash.
#'
#' @export

func_header <- function(type = 1, copy_clipboard = FALSE) {
  
  # check inputs
  assert_single_int(type)
  assert_in(type, 1:4)
  assert_single_logical(copy_clipboard)
  
  # make type1 text list
  s <- list()
  if (type == 1) {
    s <- c(s, "#------------------------------------------------")
    s <- c(s, "#' @title TODO - title")
    s <- c(s, "#'")
    s <- c(s, "#' @description TODO - description")
    s <- c(s, "#'")
    s <- c(s, "#' @details TODO - details")
    s <- c(s, "#'")
    s <- c(s, "#' @param x TODO - parameter description.")
    s <- c(s, "#'")
    s <- c(s, "#' @import ggplot2")
    s <- c(s, "#' @importFrom stats prcomp")
    s <- c(s, "#' @export")
  }
  
  # make type2 text list
  if (type == 2) {
    s <- c(s, "#------------------------------------------------")
    s <- c(s, "#' @title TODO - title")
    s <- c(s, "#'")
    s <- c(s, "#' @description TODO - description")
    s <- c(s, "#'")
    s <- c(s, "#' @details TODO - details.")
    s <- c(s, "#'")
    s <- c(s, "#' Character formatting:")
    s <- c(s, "#' \\emph{italics}, \\strong{bold}, \\code{function(argument = value)}, \\pkg{package_name}")
    s <- c(s, "#'")
    s <- c(s, "#' Mathematics:")
    s <- c(s, "#' \\eqn{a + b}: inline eqution, \\deqn{a + b}: display (block) equation")
    s <- c(s, "#'")
    s <- c(s, "#' Numbered list:")
    s <- c(s, "#'   \\enumerate{")
    s <- c(s, "#'     \\item First item")
    s <- c(s, "#'     \\item Second item")
    s <- c(s, "#'   }")
    s <- c(s, "#'")
    s <- c(s, "#' Bulleted list:")
    s <- c(s, "#'   \\itemize{")
    s <- c(s, "#'     \\item First item")
    s <- c(s, "#'     \\item Second item")
    s <- c(s, "#'   }")
    s <- c(s, "#'")
    s <- c(s, "#' Named list:")
    s <- c(s, "#'   \\describe{")
    s <- c(s, "#'     \\item{One}{First item}")
    s <- c(s, "#'     \\item{Two}{Second item}")
    s <- c(s, "#'   }")
    s <- c(s, "#'")
    s <- c(s, "#' Links to other documentation:")
    s <- c(s, "#' \\code{\\link{function}}: link to function in this package. \\code{\\link[MASS]{abbey}}: link to function in another package. \\link[=dest]{name}: link to dest, but show name. \\code{\\link[MASS:abbey]{name}}: link to function in another package, but show name. \\linkS4class{abc}: link to an S4 class.")
    s <- c(s, "#'")
    s <- c(s, "#' Links to the web:")
    s <- c(s, "#' \\url{http://rstudio.com}. \\href{http://rstudio.com}{Rstudio}. \\email{hadley@@rstudio.com} (note the doubled @)")
    s <- c(s, "#'")
    s <- c(s, "#' @param x TODO - parameter description.")
    s <- c(s, "#' @param y TODO - parameter description.")
    s <- c(s, "#'   \\itemize{")
    s <- c(s, "#'     \\item First item")
    s <- c(s, "#'     \\item Second item")
    s <- c(s, "#'   }")
    s <- c(s, "#'")
    s <- c(s, "#' @references TODO - references")
    s <- c(s, "#'")
    s <- c(s, "#' @import ggplot2")
    s <- c(s, "#' @importFrom stats prcomp")
    s <- c(s, "#' @export")
    s <- c(s, "#' @examples")
    s <- c(s, "#' # TODO - example")
  }
  
  if (type == 3) {
    s <- c(s, "#------------------------------------------------")
    s <- c(s, "# TODO - simple description (for code purposes only)")
    s <- c(s, "#' @noRd")
  }
  
  if (type == 4) {
    s <- c(s, "#------------------------------------------------")
    s <- c(s, "# TODO - simple description (for code purposes only)")
    s <- c(s, "#' @method r_function clustom_class")
    s <- c(s, "#' @export")
  }
  
  # combine text
  ret <- paste(unlist(s), collapse = "\n")
  
  # copy to clipboard
  if (copy_clipboard) {
    clip <- pipe("pbcopy", "w")                       
    write(ret, file = clip)                               
    close(clip)
  }
  
  # print text to console
  cat(ret)
}

#------------------------------------------------
#' @title Bin values in two dimensions
#'
#' @description Bin values in two dimensions based on a vector of breaks in each
#'   dimension. Values are included in a bin if they are >= the left break and <
#'   the right break. Default breaks are chosen to encompass all the data.
#'
#' @param x,y first and second dimensions of values to bin.
#' @param x_breaks,y_breaks set of breaks in x- and y-dimensions.
#'
#' @export

bin_2d <- function(x, y, x_breaks = NULL, y_breaks = NULL) {
  
  # check inputs
  assert_vector_numeric(x)
  assert_vector_numeric(y)
  assert_same_length(x, y)
  if (is.null(x_breaks)) {
    x_range <- buffer_range(min(x, na.rm = TRUE), max(x, na.rm = TRUE), buffer = 1.1)
    x_breaks <- seq(x_range[1], x_range[2], l = 101)
  }
  if (is.null(y_breaks)) {
    y_range <- buffer_range(min(y, na.rm = TRUE), max(y, na.rm = TRUE), buffer = 1.1)
    y_breaks <- seq(y_range[1], y_range[2], l = 101)
  }
  assert_vector_numeric(x_breaks)
  assert_vector_numeric(y_breaks)
  
  # get number of breaks in each dimension
  nx <- length(x_breaks)
  ny <- length(y_breaks)
  
  # create table of binned values
  tb <- table(findInterval(x, x_breaks), findInterval(y, y_breaks))
  
  # convert to dataframe and force numeric
  df <- as.data.frame(tb, stringsAsFactors = FALSE)
  names(df) <- c("x", "y", "count")
  df$x <- as.numeric(df$x)
  df$y <- as.numeric(df$y)
  
  # subset to within breaks range
  df <- subset(df, x > 0 & x < nx & y > 0 & y < ny)
  
  # fill in matrix
  z <- matrix(0, ny-1, nx-1)
  z[cbind(df$y, df$x)] <- df$count
  
  # return output as list
  ret <- list(x_mids = midpoints(x_breaks),
              y_mids = midpoints(y_breaks),
              z = z)
  return(ret)
}

#------------------------------------------------
#' @title Expand range to include buffer
#'
#' @description Expand range to include buffer. The buffer distance is defined
#'   as a proportion of the range.
#'
#' @param x_min,x_max minimum and maximum of starting range.
#' @param buffer the proportional increase in the starting range.
#'
#' @export

buffer_range <- function(x_min, x_max, buffer = 1.1) {
  
  # check inputs
  assert_single_numeric(x_min)
  assert_single_numeric(x_max)
  assert_gr(x_max, x_min)
  assert_single_pos(buffer, zero_allowed = FALSE)
  
  # create buffer range
  x_range <- (x_max - x_min)
  x_mid <- (x_max + x_min) / 2
  ret <- c(x_mid - (x_range / 2)*buffer, x_mid + (x_range / 2)*buffer)
  
  return(ret)
}

#------------------------------------------------
#' @title Calculate pairwise great circle distance between points
#'
#' @description Analogue of the \code{dist()} function, but calculating great
#'   circle distances (distance on the surface of a sphere). Points are input as
#'   longitude and latitude.
#'
#' @param x a two-column matrix or dataframe with longitude in the first column
#'   and latitude in the second.
#'
#' @export

dist_gc <- function(x) {
  
  # check inputs
  assert_ncol(x, 2)
  
  # calculate distance matrix
  ret <- apply(x, 1, function(y) {
    lonlat_to_bearing(x[,1], x[,2], y[1], y[2])$gc_dist
    })
  diag(ret) <- 0
  
  return(ret)
}

#------------------------------------------------
#' @title Calculate great circle distance and bearing between coordinates
#'
#' @description Calculate great circle distance and bearing between spatial
#'   coordinates.
#'
#' @param origin_lon,origin_lat the origin longitude and latitude.
#' @param dest_lon,dest_lat the destination longitude and latitude.
#' @param earth_rad the assumed radius of the earth (km).
#'
#' @export

lonlat_to_bearing <- function(origin_lon, origin_lat, dest_lon, dest_lat, earth_rad = 6371) {
  
  # check inputs
  assert_vector_numeric(origin_lon)
  assert_vector_numeric(origin_lat)
  assert_same_length(origin_lon, origin_lat)
  assert_vector_numeric(dest_lon)
  assert_vector_numeric(dest_lat)
  assert_same_length(dest_lon, dest_lat)
  assert_single_pos(earth_rad, zero_allowed = FALSE)
  
  # convert input arguments to radians
  origin_lon <- origin_lon*2*pi/360
  origin_lat <- origin_lat*2*pi/360
  dest_lon <- dest_lon*2*pi/360
  dest_lat <- dest_lat*2*pi/360
  
  delta_lon <- dest_lon - origin_lon
  
  # calculate bearing
  bearing <- atan2(sin(delta_lon)*cos(dest_lat),
                   cos(origin_lat)*sin(dest_lat) - sin(origin_lat)*cos(dest_lat)*cos(delta_lon))
  
  # calculate great circle angle. Use temporary variable to avoid acos(>1) or 
  # acos(<0), which can happen due to underflow issues
  tmp <- sin(origin_lat)*sin(dest_lat) + cos(origin_lat)*cos(dest_lat)*cos(delta_lon)
  tmp <- ifelse(tmp > 1, 1, tmp)
  tmp <- ifelse(tmp < 0, 0, tmp)
  gc_angle <- acos(tmp)
  
  # convert bearing from radians to degrees measured clockwise from due north,
  # and convert gc_angle to great circle distance via radius of earth (km)
  bearing <- bearing*360/(2*pi)
  bearing <- (bearing+360)%%360
  gc_dist <- earth_rad*gc_angle
  
  # return list
  ret <-list(bearing = bearing,
             gc_dist = gc_dist)
  return(ret)
}

#------------------------------------------------
#' @title Create layout matrix automatically from plot number
#'
#' @description Create a layout matrix that can be used with gridExtra to 
#'   arrange ggplot objects. The dimensions of the matrix are chosen 
#'   automatically from the number of plots to create a rectangular arrangement
#'   (portrait) of sufficient size.
#'
#' @param n number of plots in final layout
#'
#' @export

layout_mat <- function(n) {
  c <- ceiling(sqrt(n))
  if (c*(c-1) >= n) {
    ret <- matrix(1:(c*(c-1)), nrow = c, byrow = TRUE)
  } else {
    ret <- matrix(1:c^2, nrow = c, byrow = TRUE)
  }
  return(ret)
}

#------------------------------------------------
#' @title Get projection matrix
#'
#' @description Get projection matrix from \code{persp()} function without
#'   producing plot. The angles of projection are given by \code{theta}
#'   (rotation) and \code{phi} (elevation), and the strength of perspective
#'   transformation is given by \code{d}.
#'
#' @param x_lim,y_lim,z_lim limits of \code{persp()} plot.
#' @param theta angle of rotation (degrees).
#' @param phi angle of elevation (degrees).
#' @param d strength of perspective transformation.
#'
#' @importFrom grDevices dev.off jpeg
#' @importFrom graphics persp
#' @export

get_projection <- function(x_lim, y_lim, z_lim, theta, phi, d = 2) {
  
  # check inputs
  assert_limit(x_lim)
  assert_limit(y_lim)
  assert_limit(z_lim)
  assert_single_numeric(theta)
  assert_single_numeric(phi)
  assert_single_pos(d, zero_allowed = FALSE)
  
  # write perspective plot to temp file
  jpeg(tempfile())
  ret <- suppressWarnings(persp(matrix(0,2,2),
                                xlim = x_lim,
                                ylim = y_lim,
                                zlim = z_lim,
                                theta = theta,
                                phi = phi,
                                d = d))
  dev.off()
  
  return(ret)
}

#------------------------------------------------
#' @title Project 3D coordinates to 2D
#'
#' @description Use a projection matrix to transform 3D world coordinates to 2D
#'   coordinates - for example representing a camera viewing a 3D scene. See
#'   \code{get_projection()} for how to obtain a projection mtarix. This
#'   function is equivalent to the \code{trans3d()} function, but also stores
#'   depth information relative to the camera.
#'
#' @param x,y,z world coordinates.
#' @param proj_mat 4*4 projection matrix, as returned from
#'   \code{get_projection()}.
#'
#' @export

project_2d <- function(x, y, z, proj_mat) {
  tmp <- cbind(x,y,z,1) %*% proj_mat
  tmp <- sweep(tmp, 1, tmp[,4], "/")[, -4, drop = FALSE]
  ret <- as.data.frame(tmp)
  names(ret) <- c("x", "y", "depth")
  return(ret)
}

#------------------------------------------------
#' @title Project 3D line coordinates to 2D
#'
#' @description Use \code{project2D} to project start and end points from 3D
#'   world coordinates to 2D screen coordinates.
#'
#' @param x0,y0,z0 coordinates of start point.
#' @param x1,y1,z1 coordinates of end point.
#' @param proj_mat 4*4 projection matrix, as returned from
#'   \code{get_projection()}.
#'
#' @export

project_2d_line <- function(x0, y0, z0, x1, y1, z1, proj_mat) {
  
  # project start and end points
  point0 <- project_2d(x0, y0, z0, proj_mat)
  point1 <- project_2d(x1, y1, z1, proj_mat)
  
  # make dataframe
  ret <- data.frame(x0 = point0[,"x"], y0 = point0[,"y"], x1 = point1[,"x"], y1 = point1[,"y"])
  
  return(ret)
}

#------------------------------------------------
#' @title Add grid lines to ggplot perspective plot
#'
#' @description Add grid lines to a ggplot perspective plot.
#'
#' @details The vectors \code{x}, \code{y}, and \code{z} together define the
#'   locations and orientations of the lines. One of these vectors must be a
#'   series of breaks, one must be a limit (i.e. a vector of two values) and one
#'   must be a single value. For example, if \code{x = 1:5, y = c(-1,1), z = 3}
#'   then lines will be drawn parallel to the y-axis at x-values 1:5, spanning a
#'   range -1 to 1, and in the z-plane at position z = 3. If there are only two
#'   breaks then it becomes impossible to determine which axis represents breaks
#'   and which represents limits, hence the argument \code{break_axis} must be
#'   specified (otherwise this is chosen automatically).
#'
#' @param myplot an object of class \code{ggplot}.
#' @param x,y,z coordinates of lines. Values must be a sequence of breaks
#'   \emph{or} a pair of start-end values \emph{or} a single value (see
#'   details).
#' @param proj_mat 4*4 projection matrix, as returned from
#'   \code{get_projection()}.
#' @param col line colour.
#' @param size line size.
#' @param break_axis which axis to apply breaks over. If \code{NULL} then chosen
#'   automatically from other inputs, otherwise must be one of \code{"x"},
#'   \code{"y"} or \code{"z"} (see details).
#'
#' @export

gg3d_add_grid_lines <- function(myplot, x = seq(0,1,0.1), y = c(0,1), z = 0,
                                proj_mat, col = grey(0.8), size = 0.5, break_axis = NULL) {
  
  # check inputs
  assert_class(myplot, "ggplot")
  assert_vector_numeric(x)
  assert_increasing(x)
  assert_vector_numeric(y)
  assert_increasing(y)
  assert_vector_numeric(z)
  assert_increasing(z)
  assert_matrix_numeric(proj_mat)
  assert_dim(proj_mat, c(4,4))
  
  # attempt to set break_axis from other inputs
  arg_lengths <- mapply(length, list(x,y,z))
  if (is.null(break_axis)) {
    if (!any(arg_lengths > 2)) {
      stop("unable to set break_axis automatically due to ambiguous inputs. Need to set this argument manually to 'x', 'y' or 'z'")
    }
    break_axis <- c("x", "y", "z")[arg_lengths > 2]
  }
  assert_in(break_axis, c("x", "y", "z"))
  
  # work out which axis is planar, and which is orthogonal to the break axis
  planar_axis <- setdiff(c("x", "y", "z")[arg_lengths == 1], break_axis)
  orthog_axis <- setdiff(c("x", "y", "z"), c(break_axis, planar_axis))
  
  # check x,y,z lengths
  switch (planar_axis,
    "x" = assert_single_numeric(x),
    "y" = assert_single_numeric(y),
    "z" = assert_single_numeric(z)
  )
  switch (orthog_axis,
    "x" = assert_limit(x),
    "y" = assert_limit(y),
    "z" = assert_limit(z)
  )
  
  # calculate start and end points of lines in 2D projection
  switch (orthog_axis,
    "x" = plot_df <- project_2d_line(x[1], y, z, x[2], y, z, proj_mat),
    "y" = plot_df <- project_2d_line(x, y[1], z, x, y[2], z, proj_mat),
    "z" = plot_df <- project_2d_line(x, y, z[1], x, y, z[2], proj_mat)
  )  
  
  # add to ggplot object
  myplot <- myplot + geom_segment(aes_(x = ~x0, y = ~y0, xend = ~x1, yend = ~y1), col = col, size = size, data = plot_df)
  
  # return invisibly
  invisible(myplot)
}

#------------------------------------------------
#' @title Produce a 3D scatterplot in ggplot format
#'
#' @description Produce a 3D scatterplot in ggplot format.
#'
#' @param x,y,z vectors giving coordinates of points.
#' @param colour vector specifying point colours. Can be continuous or discrete.
#' @param size size of data points.
#' @param theta angle of rotation (degrees).
#' @param phi angle of elevation (degrees).
#' @param d strength of perspective transformation.
#' @param x_lim,y_lim,z_lim plotting limits.
#' @param x_grid,y_grid,z_grid sequences of breaks defining grid.
#' @param z_type switch between horizontal grid in the z-dimension (\code{z_type
#'   = 1}) and both horizontal and vertical grid (\code{z_type = 2}).
#' @param flip_grid_x,flip_grid_y if \code{TRUE} then the vertical grid in the
#'   x- or y-axis is moved to the other side of the plot.
#' @param grid_col the colour of grid lines.
#' @param grid_size the size of grid lines.
#' @param axis_on whether to draw axis lines.
#' @param axis_col the colour of axis lines.
#' @param axis_size the size of axis lines.
#' @param zero_line_on whether to draw lines at zero in every dimension.
#' @param zero_line_col the colour of zero lines.
#' @param zero_line_size the size of zero lines.
#' @param x_lab,y_lab,z_lab axis labels.
#' @param tick_length the absolute length of axis ticks.
#' @param axis_lab_size the size of axis labels.
#' @param axis_lab_dist the absolute distance of axis labels from the edge of
#'   the plotting region.
#'
#' @importFrom grDevices grey
#' @import ggplot2
#' @export

gg3d_scatterplot <- function(x, y, z, colour = 1, size = 0.5, theta = 135, phi = 30, d = 2,
                             x_lim = NULL, y_lim = NULL, z_lim = NULL, x_grid = NULL, y_grid = NULL,
                             z_grid = NULL, z_type = 2, flip_grid_x = FALSE, flip_grid_y = FALSE,
                             grid_col = grey(0.8), grid_size = 0.25, zero_line_on = TRUE,
                             zero_line_col = grey(0.8), zero_line_size = 0.6, axis_on = FALSE,
                             axis_col = "black", axis_size = 0.5, x_lab = "x", y_lab = "y", z_lab = "z",
                             tick_length = 0.2, axis_lab_size = 3, axis_lab_dist = 2) {
  
  # check inputs
  assert_vector_numeric(x)
  assert_vector_numeric(y)
  assert_vector_numeric(z)
  assert_single_pos(size)
  assert_single_numeric(theta)
  assert_single_numeric(phi)
  assert_single_pos(d)
  if (!is.null(x_lim)) {
    assert_limit(x_lim)
  }
  if (!is.null(y_lim)) {
    assert_limit(y_lim)
  }
  if (!is.null(z_lim)) {
    assert_limit(z_lim)
  }
  if (!is.null(x_grid)) {
    assert_vector_numeric(x_grid)
  }
  if (!is.null(y_grid)) {
    assert_vector_numeric(y_grid)
  }
  if (!is.null(z_grid)) {
    assert_vector_numeric(z_grid)
  }
  assert_in(z_type, 1:2)
  assert_single_logical(flip_grid_x)
  assert_single_logical(flip_grid_y)
  assert_single_pos(grid_size)
  assert_single_logical(axis_on)
  assert_single_pos(axis_size)
  assert_single_logical(zero_line_on)
  assert_single_pos(zero_line_size)
  assert_single_pos(tick_length)
  assert_single_pos(axis_lab_size)
  assert_single_pos(axis_lab_dist)
  
  # get pretty breaks in each dimension
  x_pretty <- pretty(x)
  y_pretty <- pretty(y)
  z_pretty <- pretty(z)
  
  # set default limits
  x_lim <- define_default(x_lim, range(x_pretty))
  y_lim <- define_default(y_lim, range(y_pretty))
  z_lim <- define_default(z_lim, range(z_pretty))
  
  # set default grid lines
  if (is.null(x_grid)) {
    delta_x <- x_pretty[2] - x_pretty[1]
    x_grid <- seq(floor(x_lim[1]/delta_x)*delta_x, ceiling(x_lim[2]/delta_x)*delta_x, delta_x)
    x_grid <- x_grid[x_grid >= x_lim[1] & x_grid <= x_lim[2]]
  }
  if (is.null(y_grid)) {
    delta_y <- y_pretty[2] - y_pretty[1]
    y_grid <- seq(floor(y_lim[1]/delta_y)*delta_y, ceiling(y_lim[2]/delta_y)*delta_y, delta_y)
    y_grid <- y_grid[y_grid >= y_lim[1] & y_grid <= y_lim[2]]
  }
  if (is.null(z_grid)) {
    delta_z <- z_pretty[2] - z_pretty[1]
    z_grid <- seq(floor(z_lim[1]/delta_z)*delta_z, ceiling(z_lim[2]/delta_z)*delta_z, delta_z)
    z_grid <- z_grid[z_grid >= z_lim[1] & z_grid <= z_lim[2]]
  }
  
  # get scaling factor in each dimension
  max_scale <- max(diff(x_lim), diff(y_lim), diff(z_lim))
  scale_factor <- c(diff(x_lim), diff(y_lim), diff(z_lim))/max_scale
  
  # get projection matrix
  proj_mat <- get_projection(x_lim = x_lim/scale_factor[1],
                             y_lim = y_lim/scale_factor[2],
                             z_lim = z_lim/scale_factor[3],
                             theta = theta,
                             phi = phi,
                             d = d)
  
  # produce basic plot
  plot1 <- ggplot() + theme(panel.grid.major = element_blank(),
                            panel.grid.minor = element_blank(),
                            panel.background = element_blank(),
                            axis.title.x = element_blank(),
                            axis.text.x = element_blank(),
                            axis.ticks.x = element_blank(),
                            axis.title.y = element_blank(),
                            axis.text.y = element_blank(),
                            axis.ticks.y = element_blank())
  
  # choose where to put vertical planes
  xv <- x_lim
  if (flip_grid_x) {
    xv <- rev(x_lim)
  }
  yv <- y_lim
  if (flip_grid_y) {
    yv <- rev(y_lim)
  }
  
  # z-plane gridlines
  plot1 <- gg3d_add_grid_lines(plot1, x = x_grid, y = y_lim, z = z_lim[1], proj_mat = proj_mat,
                               col = grid_col, size = grid_size, break_axis = "x")
  plot1 <- gg3d_add_grid_lines(plot1, x = x_lim, y = y_grid, z = z_lim[1], proj_mat = proj_mat,
                               col = grid_col, size = grid_size, break_axis = "y")
  
  # x-plane gridlines
  plot1 <- gg3d_add_grid_lines(plot1, x = x_lim, y = yv[1], z = z_grid, proj_mat = proj_mat,
                               col = grid_col, size = grid_size, break_axis = "z")
  if (z_type == 2) {
    plot1 <- gg3d_add_grid_lines(plot1, x = x_grid, y = yv[1], z = z_lim, proj_mat = proj_mat,
                                 col = grid_col, size = grid_size, break_axis = "x")
  }
  
  # y-plane gridlines
  plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = y_lim, z = z_grid, proj_mat = proj_mat,
                               col = grid_col, size = grid_size, break_axis = "z")
  if (z_type == 2) {
    plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = y_grid, z = z_lim, proj_mat = proj_mat,
                                 col = grid_col, size = grid_size, break_axis = "y")
  }
  
  # add zero lines
  if (zero_line_on) {
    plot1 <- gg3d_add_grid_lines(plot1, x = 0, y = y_lim, z = z_lim[1], proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "x")
    plot1 <- gg3d_add_grid_lines(plot1, x = 0, y = yv[1], z = z_lim, proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "y")
    plot1 <- gg3d_add_grid_lines(plot1, x = x_lim, y = 0, z = z_lim[1], proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "y")
    plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = 0, z = z_lim, proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "x")
    plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = y_lim, z = 0, proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "z")
    plot1 <- gg3d_add_grid_lines(plot1, x = x_lim, y = yv[1], z = 0, proj_mat = proj_mat,
                                 col = zero_line_col, size = zero_line_size, break_axis = "z")
  }
  
  # add axis lines
  if (axis_on) {
    plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = y_lim, z = z_lim[1], proj_mat = proj_mat,
                                 col = axis_col, size = axis_size, break_axis = "x")
    plot1 <- gg3d_add_grid_lines(plot1, x = x_lim, y = yv[1], z = z_lim[1], proj_mat = proj_mat,
                                 col = axis_col, size = axis_size, break_axis = "y")
    plot1 <- gg3d_add_grid_lines(plot1, x = xv[1], y = yv[1], z = z_lim, proj_mat = proj_mat,
                                 col = axis_col, size = axis_size, break_axis = "x")
  }
  
  # calculate tick lengths
  delta_x <- tick_length
  delta_y <- tick_length
  delta_z <- -tick_length
  
  # choose where to put ticks
  if (flip_grid_x) {
    delta_x <- -delta_x
  }
  if (flip_grid_y) {
    delta_y <- -delta_y
  }
  
  # calculate tick lines
  tick_x <- project_2d_line(x_grid, yv[2], z_lim[1], x_grid, yv[2] + delta_y, z_lim[1] + delta_z, proj_mat)
  tick_y <- project_2d_line(xv[2], y_grid, z_lim[1], xv[2] + delta_x, y_grid, z_lim[1] + delta_z, proj_mat)
  tick_z <- project_2d_line(xv[1], yv[2], z_grid, xv[1] - delta_x, yv[2] + delta_y, z_grid, proj_mat)
  
  # add ticks
  tick_all <- rbind(tick_x, tick_y, tick_z)
  plot1 <- plot1 + geom_segment(aes_(x = ~x0, y = ~y0, xend = ~x1, yend = ~y1),
                                col = grey(0.8), size = 0.25, data = tick_all)
  
  # calculate tick text positions
  tick_text_x <- project_2d(x_grid, yv[2] + delta_y*1.5, z_lim[1] + delta_z*1.5, proj_mat)
  tick_text_x$label <- x_grid
  tick_text_y <- project_2d(xv[2] + delta_x*1.5, y_grid, z_lim[1] + delta_z*1.5, proj_mat)
  tick_text_y$label <- y_grid
  tick_text_z <- project_2d(xv[1] - delta_x*1.5, yv[2] + delta_y*1.5, z_grid, proj_mat)
  tick_text_z$label <- z_grid
  
  # add tick text
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = 2, data = tick_text_x)
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = 2, data = tick_text_y)
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = 2, data = tick_text_z)
  
  # calculate axis label positions
  axis_text_x <- project_2d(mean(x_lim), yv[2] + axis_lab_dist, z_lim[1], proj_mat)
  axis_text_x$label <- x_lab
  axis_text_y <- project_2d(xv[2] + axis_lab_dist, mean(y_lim), z_lim[1], proj_mat)
  axis_text_y$label <- y_lab
  axis_text_z <- project_2d(xv[1], yv[2] + axis_lab_dist, mean(z_lim), proj_mat)
  axis_text_z$label <- z_lab
  
  # add axis labels
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = axis_lab_size, data = axis_text_x)
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = axis_lab_size, data = axis_text_y)
  plot1 <- plot1 + geom_text(aes_(x = ~x, y = ~y, label = ~label), size = axis_lab_size, data = axis_text_z)
  
  # convert data coordinates
  data_df <- project_2d(x, y, z, proj_mat)
  data_df$col <- colour
  data_df <- data_df[order(data_df$depth, decreasing = FALSE),]
  
  # add data
  plot1 <- plot1 + geom_point(aes_(x = ~x, y = ~y, col = ~col), size = size, data = data_df)
  
  # return plotting object
  return(plot1)
}

#------------------------------------------------
#' @title Add inset plot to faceted ggplot
#'
#' @description Ordinarily ggplot is not setup to add inset plots (annotations)
#'   to faceted plots because it wants to add the same inset to every facet.
#'   This custom annotation function makes it possible to add an inset to a
#'   single facet. Credit goes to
#'   \href{https://stackoverflow.com/users/471093/baptiste}{baptiste} for
#'   \href{https://stackoverflow.com/questions/37867758/insetting-on-facet-grided-and-grid-arrangeed-plot}{this}
#'   solution.
#'
#' @param grob a ggplot object.
#' @param xmin,xmax,ymin,ymax coordinates at which the inset starts and ends.
#'   Set to \code{-Inf} to always start from far edge.
#' @param data (dataframe) should specify the value of the variable used to
#'   facet the original plot (see examples).
#'
#' @import ggplot2
#' @export
#' @examples
#' # produce main plot and inset plot
#' plot_df <- data.frame(x = rnorm(1e3), y = rnorm(1e3), group = 1:2)
#' plot_main <- ggplot2::ggplot() +
#'              ggplot2::geom_point(ggplot2::aes(x = x, y = y), data = plot_df) +
#'              ggplot2::facet_wrap(~group)
#' plot_inset <- ggplot2::qplot(rnorm(1e3))
#' 
#' # use gg_inset to annotate first panel only
#' plot_combined <- plot_main + gg_inset(ggplot2::ggplotGrob(plot_inset), data = data.frame(group = 1),
#'                                       xmin = -Inf, xmax = 1.5, ymin = 0.6, ymax = Inf)
#' 
#' plot_combined

gg_inset <- function (grob, xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, data) {
  layer(data = data, stat = StatIdentity, position = PositionIdentity, 
        geom = ggplot2::GeomCustomAnn,
        inherit.aes = TRUE, params = list(grob = grob, 
                                          xmin = xmin, xmax = xmax, 
                                          ymin = ymin, ymax = ymax))
}

#------------------------------------------------
#' @title Convert DNA sequence to amino-acid
#'
#' @description Convert three-letter sequence of ATGC to corrseponding
#'   translated amino-adid sequence. Amino-acids can be reported in full name or
#'   single-letter code. Function is most efficient when using a vector of
#'   sequences.
#'
#' @param x three-letter character sequence of A, T, G and C.
#' @param output_format 1 for full name, 2 for single-letter code.
#'
#' @export

dna_to_aa <- function(x, output_format = 1) {
  
  # check inputs
  assert_vector_string(x)
  if (!all(mapply(nchar, x) == 3)) {
    stop("every element of x must be a three-letter character sequence")
  }
  assert_in(output_format, c(1, 2))
  
  # define codon patterns
  v1 <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", 
          "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", 
          "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", 
          "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", 
          "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", 
          "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", 
          "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", 
          "GGG")
  
  # define translation
  if (output_format == 1) {
    v2 <- c("phe", "phe", "leu", "leu",
            "ser", "ser", "ser", "ser",
            "tyr", "tyr", "STOP", "STOP",
            "cys", "cys", "STOP", "trp",
            "leu", "leu", "leu", "leu",
            "pro", "pro", "pro", "pro",
            "his", "his", "gln", "gln",
            "arg", "arg", "arg", "arg",
            "ile", "ile", "ile", "met",
            "thr", "thr", "thr", "thr",
            "asn", "asn", "lys", "lys",
            "ser", "ser", "arg", "arg",
            "val", "val", "val", "val",
            "ala", "ala", "ala", "ala",
            "asp", "asp", "glu", "glu",
            "gly", "gly", "gly", "gly")
  } else {
    v2 <- c("F", "F", "L", "L",
            "S", "S", "S", "S",
            "Y", "Y", "*", "*",
            "C", "C", "*", "W",
            "L", "L", "L", "L",
            "P", "P", "P", "P",
            "H", "H", "Q", "Q",
            "R", "R", "R", "R",
            "I", "I", "I", "M",
            "T", "T", "T", "T",
            "N", "N", "K", "K",
            "S", "S", "R", "R",
            "V", "V", "V", "V",
            "A", "A", "A", "A",
            "D", "D", "E", "E",
            "G", "G", "G", "G")
  }
  
  # get return sequence
  ret <- v2[match(x, v1)]
  
  return(ret)
}

#------------------------------------------------
#' @title Return complementary DNA or RNA sequence
#'
#' @description Return complementary DNA or RNA sequence.
#'
#' @param x input DNA sequence as character string.
#' @param format_rna logical. If \code{TRUE} then A complements to U, rather than T.
#'
#' @export

dna_complement <- function(x, format_rna = FALSE) {
  ret <- gsub("G", "X", toupper(x))
  ret <- gsub("C", "G", ret)
  ret <- gsub("X", "C", ret)
  
  ret <- gsub("A", "X", ret)
  ret <- gsub("T", "A", ret)
  if (format_rna) {
    ret <- gsub("X", "U", ret)
  } else {
    ret <- gsub("X", "T", ret)
  }
  return(ret)
}

#------------------------------------------------
#' @title Simulate allele frequencies from Wright-Fisher model
#'
#' @description Simulate Wright-Fisher evolution in a series of partially
#'   connected demes.
#'
#' @details Assumes a haploid population and independent loci (no linkage
#'   disequilibrium). Implements a finite-alleles mutation model with equal
#'   chance of mutating from any allele to any other. Initialises allele
#'   frequencies by drawing from a symmetric Dirichlet(theta/k) distribution,
#'   where \eqn{theta = 2*N*mu}, which is the analytical equilibrium
#'   distribution between drift and finite-alleles mutation, ignoring migration
#'   between demes. Migration is implemented by proposing random swaps of
#'   individuals between demes, thereby ensuring population sizes remain
#'   constant over time. For this reason, \code{N} must be the same for all
#'   demes.
#'   
#' @param N number of individuals per deme. Must be the same for all
#'   demes.
#' @param L number of loci (assumed independent).
#' @param alleles number of alleles. Can be a single number for all loci or a
#'   vector of length \code{L}.
#' @param mu mutation rate. Assumes finite-alleles model, with equal chance of
#'   mutating from any allele to any other.
#' @param mig_mat migration matrix specifying the per-generation probability of
#'   an individual migrating from any deme (in rows) to any other deme (in
#'   columns).
#' @param t_out vector of times at which results will be output.
#' @param initial_method,initial_params method of initialising allele
#'   frequencies, and parameters that are used in initialisation. There are two
#'   possible options:
#'   \enumerate{
#'   \item Each deme has allele frequencies drawn independently from a symmetric
#'   Dirichlet(theta/k) distribution, where \eqn{theta = 2*N*mu} and k is the
#'   number of alleles. This is the analytical equilibrium distribution under
#'   the model if there was no migration between demes.
#'     \item All demes have the same initial allele frequencies, which are drawn
#'     once from a Dirichlet(alpha_1, ..., alpha_k) distribution, where the
#'     alpha parameters are input as \code{initial_params}. This can be a vector
#'     if the same number of alleles is used over all loci, or a list of vectors
#'     over loci to accommodate varying numbers of alleles.
#'   }
#' @param silent if \code{TRUE} then suppress output to console.
#'
#' @importFrom utils txtProgressBar
#' @export

sim_wrightfisher <- function(N, L, alleles, mu, mig_mat, t_out,
                             initial_method = 1, initial_params = NULL, silent = FALSE) {
  
  # check inputs
  assert_single_pos_int(N, zero_allowed = FALSE)
  assert_single_pos_int(L, zero_allowed = FALSE)
  assert_vector_pos_int(alleles, zero_allowed = FALSE)
  assert_gr(alleles, 1)
  if (length(alleles) > 1) {
    assert_length(alleles, L)
  }
  assert_single_bounded(mu)
  assert_symmetric_matrix(mig_mat)
  assert_bounded(mig_mat)
  if (!all(rowSums(mig_mat) == 1)) {
    stop("every row of mig_mat must sum to 1")
  }
  assert_vector_pos_int(t_out, zero_allowed = TRUE)
  assert_in(initial_method, c(1, 2))
  if (initial_method == 2) {
    if (length(alleles) == 1) {
      assert_vector(initial_params)
      assert_length(initial_params, alleles)
    } else {
      assert_list(initial_params)
      assert_length(initial_params, L)
      for (i in 1:L) {
        assert_length(initial_params[[i]], alleles[i])
      }
    }
  }
  assert_single_logical(silent)
  
  # process some inputs
  if (length(alleles) == 1) {
    alleles <- rep(alleles, L)
  }
  if (initial_method == 2) {
    if (!is.list(initial_params)) {
      initial_params = replicate(L, initial_params, simplify = FALSE)
    }
  }
  
  # get number of demes from dimensions of migration matrix
  K <- ncol(mig_mat)
  
  # make argument list
  args <- list(N = N,
               K = K,
               L = L,
               alleles = alleles,
               mu = mu,
               mig_mat = matrix_to_rcpp(mig_mat),
               t_out = t_out,
               initial_method = initial_method,
               initial_params = initial_params,
               silent = silent)
  
  # create progress bars
  pb <- txtProgressBar(min = 0, max = max(c(1, t_out)), initial = NA, style = 3)
  args_progress <- list(pb = pb)
  
  # functions to pass to C++
  args_functions <- list(update_progress = update_progress)
  
  # run efficient C++ function
  output_raw <- sim_wrightfisher_cpp(args, args_functions, args_progress)
  
  # process output
  output_processed <- mapply(function(t_i) {
    mapply(function(k) {
      data.frame(time = t_out[t_i],
                 deme = k,
                 locus = rep(seq_len(L), times = alleles),
                 allele = unlist(lapply(alleles, seq_len)),
                 count = unlist(output_raw$allele_counts[[t_i]][[k]]))
    }, seq_len(K), SIMPLIFY = FALSE)
  }, seq_along(t_out), SIMPLIFY = FALSE) %>%
    dplyr::bind_rows()
  
  return(output_processed)
}

#------------------------------------------------
#' @title rbind a list of matrices into a single matrix
#'
#' @description rbind a list of matrices into a single matrix. All matrices must
#'   have the same number of columns.
#'
#' @param l list of matrices.
#'
#' @export

list_to_matrix <- function(l) {
  
  # check inputs
  assert_list(l)
  
  # return if single element
  if (length(l) == 1) {
    return(l[[1]])
  }
  
  # check same ncol of all elements
  l_col <- mapply(ncol, l)
  if (any(l_col != l_col[1])) {
    stop("all matrices must have the same number of columns")
  }
  
  # rbind all matrices
  ret <- do.call(rbind, l)
  
  return(ret)
}

#------------------------------------------------
#' @title Print summary of differences and intersections of two sets
#'
#' @description Given two sets s1 and s2, print four values:
#'   \enumerate{
#'     \item the number of s1 not in s2
#'     \item the number of s1 in s2
#'     \item the number of s2 in s1
#'     \item the number of s2 not in s1
#'   }
#' 
#' The second and third values will be the same when there are no duplicates,
#' but otherwise could be different.
#'
#' @param s1 first set.
#' @param s2 second set.
#'
#' @export

set_compare <- function(s1, s2) {
  
  # check inputs
  assert_vector(s1)
  assert_vector(s2)
  
  # set operations
  v2 <- sum(s1 %in% s2)
  v1 <- length(s1) - v2
  v3 <- sum(s2 %in% s1)
  v4 <- length(s2) - v3
  ret <- c(set1_only = v1,
           set1_in_set2 = v2,
           set2_in_set1 = v3,
           set2_only = v4)
  
  return(ret)
}

#------------------------------------------------
#' @title Convert vector to matrix
#'
#' @description Given two input vectors, expands these vectors into matrices
#'   based on the dimensions of the other vector. Returns either the matrix in
#'   x, or the matrix in y.
#'
#' @param x,y two vectors of any type.
#' @param dim which dimension of the output matrix to return.
#'
#' @export

vec2mat <- function(x, y, dim) {
  
  if (dim == 1) {
    output <- matrix(rep(x, each = length(y)), length(y))
  } else {
    output <- matrix(rep(y, length(x)), length(y))
  }
  return(output)
}

#------------------------------------------------
#' @title Produce smooth colours from values
#'
#' @description Read in continuous values between xmin and xmax and return
#'   colours associated with these values, taken from a smoothly varying scale.
#'
#' @param x values from which to obtain colours.
#' @param xmin,xmax minimum and maximum values of x.
#' @param n_levels number of colours in palette.
#' @param raw_cols colours that make up the palette.
#'
#' @export

smooth_cols <- function(x,
                        xmin = min(x, na.rm = T),
                        xmax = max(x, na.rm = T),
                        n_levels = 1e3,
                        raw_cols = col_tim()) {
  
  # get x between 0 and 1
  x <- (x - xmin)/(xmax - xmin)
  
  # make smooth colours
  my_pal <- colorRampPalette(raw_cols)
  cols <- my_pal(n_levels+1)[floor(x*n_levels) + 1]
  
  return(cols)
}

#------------------------------------------------
#' @title Generate Perlin noise
#'
#' @description Generates 2D Perlin noise of any scale, in a matrix of any size. Credit to \href{https://stackoverflow.com/users/1129973/vincent-zoonekynd}{Vincent Zoonekynd} for \href{https://stackoverflow.com/questions/15387328/realistic-simulated-elevation-data-in-r-perlin-noise}{this answer}.
#'
#' @param out_rows,out_cols rows and columns in output matrix.
#' @param levels_x,levels_y bumpyness in x and y dimension.
#'
#' @export

perlin_noise = function(out_rows = 128,
                        out_cols = 128,
                        levels_x = 10,
                        levels_y = 10) {
  
  # check inputs
  assert_single_pos_int(out_rows, zero_allowed = FALSE)
  assert_single_pos_int(out_cols, zero_allowed = FALSE)
  assert_single_pos_int(levels_x, zero_allowed = FALSE)
  assert_greq(levels_x, 2)
  assert_single_pos_int(levels_y, zero_allowed = FALSE)
  assert_greq(levels_y, 2)
  
  # convert to more convenient names
  M = out_rows
  N = out_cols
  n = levels_x
  m = levels_y
  
  # for each point on this n*m grid, choose a unit 1 vector
  vector_field <- apply(array(rnorm(2*n*m), dim = c(2,n,m)), 2:3, function(u) u/sqrt(sum(u^2)))
  
  f <- function(x,y) {
    
    # find the grid cell in which the point (x,y) lies
    i <- floor(x)
    j <- floor(y)
    stopifnot( i >= 1 || j >= 1 || i < n || j < m )
    
    # the 4 vectors, from the vector field, at the vertices of the square
    v1 <- vector_field[,i,j]
    v2 <- vector_field[,i+1,j]
    v3 <- vector_field[,i,j+1]
    v4 <- vector_field[,i+1,j+1]
    
    # vectors from the point to the vertices
    u1 <- c(x,y) - c(i,j)
    u2 <- c(x,y) - c(i+1,j)
    u3 <- c(x,y) - c(i,j+1)
    u4 <- c(x,y) - c(i+1,j+1)
    
    # scalar products
    a1 <- sum( v1 * u1 )
    a2 <- sum( v2 * u2 )
    a3 <- sum( v3 * u3 )
    a4 <- sum( v4 * u4 )
    
    # weighted average of the scalar products
    s <- function(p) 3*p^2 - 2*p^3
    p <- s( x - i )
    q <- s( y - j )
    b1 <- (1-p)*a1 + p*a2
    b2 <- (1-p)*a3 + p*a4
    (1-q) * b1 + q * b2
  }
  
  xs <- seq(from = 1, to = n, length = N+1)[-(N+1)]
  ys <- seq(from = 1, to = m, length = M+1)[-(M+1)]
  outer( xs, ys, Vectorize(f) )
}

#------------------------------------------------
#' @title Generate fractal noise
#'
#' @description Generates 2D fractal noise by layering Perlin noise at different levels.
#'
#' @param out_rows,out_cols rows and columnds in output matrix.
#' @param a scale parameter. Values near zero place more weight on large
#'   features, a value of 1 places equal weight on all levels, and values > 1
#'   place more weight on small features.
#' @param n_levels number of levels of Perlin noise to layer.
#'
#' @importFrom stats rnorm
#' @export

fractal_noise = function(out_rows = 128,
                        out_cols = 128,
                        a = 0.6,
                        n_levels = 7) {
  
  # check inputs
  assert_single_pos_int(out_rows, zero_allowed = FALSE)
  assert_single_pos_int(out_cols, zero_allowed = FALSE)
  assert_single_pos(a, zero_allowed = FALSE)
  assert_single_pos_int(n_levels, zero_allowed = FALSE)
  
  # layer Perlin noise
  ret <- 0
  for (i in 1:n_levels) {
    ret <- ret + a^i*perlin_noise(out_rows, out_cols, 2^i, 2^i)
  }
  
  return(ret)
}

#------------------------------------------------
#' @title Apply box blur to a matrix
#'
#' @description Smooths a matrix of values by applying a box blur, in which each
#'   pixel of a matrix is replaced with the average value of pixels in a box
#'   around it.
#'
#' @param m a matrix of values to be blurred.
#' @param d the distance either side of each pixel that is searched when
#'   blurring
#'
#' @export

box_blur = function(m, d = 5) {
  
  # check inputs
  assert_matrix(m)
  assert_numeric(m)
  assert_single_pos_int(d, zero_allowed = FALSE)
  
  # run efficient C++ function
  ret <- box_blur_cpp(m, d)
  
  return(ret)
}

#------------------------------------------------
#' @title Return object size in auto units
#'
#' @description Return object size in auto units.
#'
#' @param x object.
#'
#' @importFrom utils object.size
#' @export

object.size_auto <- function(x) {
  format(object.size(x), units = "auto")
}

#------------------------------------------------
#' @title Calculate moving average over a window
#'
#' @description Calculate moving average of a sequence of values over a defined
#'   window that extends \code{d} units either side of the central value (hence
#'   \code{d=1} returns the original sequence). NA values are not counted in the
#'   numerator or denominator of the average. Where the window goes beyond the
#'   limits of the sequence, these values can included in the moving average
#'   (simply ignoring values outside the range) or returned as NA.
#'
#' @param x vector of numeric values.
#' @param d how far to look either side of central value. For example,
#'   \code{d=2} would search -2:2.
#' @param include_ends if \code{TRUE}, values where the window extends beyond
#'   the range of the sequence are included in the output. Otherwise return NA
#'   for these values.
#'
#' @export

moving_average <- function(x, d = 3, include_ends = TRUE) {
  
  # check inputs
  assert_vector_numeric(x)
  assert_single_pos_int(d, zero_allowed = TRUE)
  assert_single_logical(include_ends)
  
  # get start and end of range to explore
  n <- length(x)
  x_start <- (1:n) - d
  x_end <- (1:n) + d
  x_inner <- (x_start >= 1) & (x_end <= n)
  x_start[x_start < 1] <- 1
  x_end[x_end > n] <- n
  
  # get cumulative sums
  y <- ifelse(is.na(x), 0, x)
  cx <- cumsum(y)
  cn <- cumsum(!is.na(x))
  
  # calculate final numerator and denominator
  x_sum <- y[x_start] + cx[x_end] - cx[x_start]
  x_n <- (!is.na(x[x_start])) + cn[x_end] - cn[x_start]
  ret <- x_sum / x_n
  
  # drop ends if needed
  if (!include_ends) {
    ret[x_inner == FALSE] <- NA
  }
  
  return(ret)
}

#------------------------------------------------
#' @title Get cubic spline between coordinates
#'
#' @description Given a set of x,y coordinates, calculate the cubic spline that
#'   goes through all points. The solution is returned at a specified set of
#'   x-coordinates, which must be contained within the input x-coordinates. Both
#'   input and output x-coordinates must be increasing (i.e. the spline cannot
#'   double back).
#'
#' @param x,y coordinates of points
#' @param x_pred x-coordinates at which to calculate cubic spline.
#'
#' @export

cubic_spline <- function(x, y, x_pred) {
  
  # check inputs
  assert_vector_numeric(x)
  assert_increasing(x)
  assert_vector_numeric(y)
  assert_same_length(x, y)
  assert_vector_numeric(x_pred)
  assert_increasing(x_pred)
  assert_greq(min(x_pred), min(x))
  assert_leq(max(x_pred), max(x))
  
  # calculate spline coefficients
  n <- length(x) - 1
  c <- l <- mu <- z <- rep(0, n + 1)
  h <- b <- d <- alpha <- rep(NA, n)
  for (i in 1:n) {
    h[i] <- x[i+1] - x[i]
  }
  for (i in 2:n) {
    alpha[i] <- 3/h[i]*(y[i+1] - y[i]) - 3/h[i-1]*(y[i] - y[i-1])
  }
  l[1] <- 1
  for (i in 2:n) {
    l[i] <- 2*(x[i+1] - x[i-1]) - h[i-1]*mu[i-1]
    mu[i] <- h[i]/l[i]
    z[i] <- (alpha[i] - h[i-1]*z[i-1])/l[i]
  }
  l[n+1] <- 1
  for (i in n:1) {
    c[i] <- z[i] - mu[i]*c[i+1]
    b[i] <- (y[i+1] - y[i])/h[i] - h[i]*(c[i+1] + 2*c[i])/3
    d[i] <- (c[i+1] - c[i])/(3*h[i])
  }
  
  # make spline
  s <- rep(NA, length(x_pred))
  j <- 1
  for (i in seq_along(x_pred)) {
    while (x_pred[i] > x[j+1]) {
      j <- j + 1
    }
    s[i] <- y[j] + b[j]*(x_pred[i] - x[j]) + c[j]*(x_pred[i] - x[j])^2 + d[j]*(x_pred[i] - x[j])^3
  }
  
  return(s)
}

#------------------------------------------------
#' @title The Incomplete Gamma Function
#'
#' @description Returns the value of the incomplete gamma function, as defined
#'   \href{https://en.wikipedia.org/wiki/Incomplete_gamma_function#Definition}{here}.
#'   Accepts vector inputs.
#'
#' @param s shape parameter
#' @param x lower bound of integral
#'
#' @importFrom stats pgamma
#' @export

gamma_incomplete <- function(s, x) {
  
  # check inputs
  assert_numeric(s)
  assert_pos(x)
  
  # back-engineer incomplete gamma function from pgamma
  ret <- gamma(s) * pgamma(x, shape = s, rate = 1, lower.tail = FALSE)
  
  return(ret)
}

#------------------------------------------------
#' @name invgamma
#' @aliases dinvgamma
#' @aliases pinvgamma
#'
#' @title The Inverse-Gamma Distribution
#'
#' @description Density, distribution function, quantile function and random
#'   generation for the inverse-gamma distribution with shape \code{alpha} and
#'   scale \code{beta}.
#'
#' @param x vector of values or quantiles.
#' @param p vector of probabilities.
#' @param q vector of quantiles
#' @param n number of observations.
#' @param alpha shape parameter.
#' @param beta scale parameter.
#' @param log_on logical; if \code{TRUE} values are returned logged.
#' @param lower_tail logical; if \code{TRUE} (default) probabilities are
#'   \emph{P[X <= x]} otherwise \emph{P[X > x]}.
#'
NULL

#' @rdname invgamma
#' @export
dinvgamma <- function(x, alpha, beta, log_on = FALSE) {
  
  # check inputs
  assert_pos(x)
  assert_pos(alpha)
  assert_pos(beta)
  assert_single_logical(log_on)
  
  # calculate return value in log space
  ret <- alpha*log(beta) - lgamma(alpha) - (alpha + 1)*log(x) - beta/x
  
  # exponentiate as needed
  if (!log_on) {
    ret <- exp(ret)
  }
  
  return(ret)
}

#' @rdname invgamma
#' @importFrom stats pgamma
#' @export
pinvgamma <- function(q, alpha, beta, lower_tail = TRUE) {
  
  # check inputs
  assert_pos(q)
  assert_pos(alpha)
  assert_pos(beta)
  assert_single_logical(lower_tail)
  
  # calculate return value directly from gamma density
  ret <- pgamma(beta / q, shape = alpha, rate = 1, lower.tail = !lower_tail)
  
  return(ret)
}

#' @rdname invgamma
#' @importFrom stats qgamma
#' @export
qinvgamma <- function(p, alpha, beta, lower_tail = TRUE) {
  
  # check inputs
  assert_pos(p)
  assert_pos(alpha)
  assert_pos(beta)
  assert_single_logical(lower_tail)
  
  # calculate return value directly from gamma density
  ret <- beta / qgamma(p, shape = alpha, rate = 1, lower.tail = !lower_tail)
  
  return(ret)
}

#' @rdname invgamma
#' @export
rinvgamma <- function(n, alpha, beta) {
  
  # check inputs
  assert_single_pos_int(n, zero_allowed = TRUE)
  assert_single_pos(alpha)
  assert_single_pos(beta)
  
  # draw by transformation of gamma random variable
  ret <- 1 / rgamma(n, shape = alpha, rate = beta)
  
  return(ret)
}

#------------------------------------------------
#' @title Quick plot of density distribution
#'
#' @description Produces simple plot of probability density for the main
#'   families of distributions, including normal, lognormal, gamma and
#'   inverse-gamma.
#'
#' @param dist the name of the distribution family. For exapmle \code{"norm"}
#'   can be used to explore the \code{dnorm()} density.
#' @param qrange the quantile range over which to plot.
#' @param ... further parameters to the distribution function.
#'
#' @import ggplot2
#' @export

plot_density <- function(dist, qrange = c(0.01, 0.99), ...) {
  
  # check inputs
  assert_in(dist, c("norm", "lnorm", "gamma", "invgamma", "exp", "weibull"))
  assert_limit(qrange)
  assert_bounded(qrange, inclusive_left = FALSE, inclusive_right = FALSE)
  
  # avoid no visible binding error
  y <- NULL
  
  # get additional arguments into list
  arg_list <- list(...)
  
  # get limits that capture most of the distribution
  eval(parse(text = sprintf("quants <- do.call(q%s, append(arg_list, list(p = c(%s, %s))))",
                            dist, qrange[1], qrange[2])))
  
  # manually fix limits for some families of distribution
  if (dist %in% c("lnorm", "gamma", "invgamma")) {
    quants[1] <- 0
  }
  
  # calculate density over chosen range
  x <- seq(quants[1], quants[2], l = 1e3 + 1)
  eval(parse(text = sprintf("y <- do.call(d%s, append(arg_list, list(x = x)))", dist)))
  
  # produce plot
  plot1 <- ggplot2::ggplot(data.frame(x = x, y = y)) + ggplot2::theme_bw() +
    ggplot2::geom_area(ggplot2::aes_(x = ~x, y = ~y), fill = grey(0.5)) +
    ggplot2::xlab("x") + ggplot2::ylab("probability density") +
    ggplot2::scale_x_continuous(expand = expansion(mult = c(0, 0))) +
    ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.1)))
  
  return(plot1)
}

#------------------------------------------------
#' @title Draw from Chinese restaurant process
#'
#' @description Draw a partition of n values from the Chinese restaurant process
#'   with concentration parameter \code{alpha}.
#'
#' @param n the number of random draws.
#' @param alpha the concentration parameter of the process.
#'
#' @export

rCRP <- function(n, alpha = 1) {
  
  # check inputs
  assert_single_pos_int(n, zero_allowed = FALSE)
  assert_single_pos(alpha)
  
  # draw values sequentially
  ret <- rep(NA, n)
  f <- alpha
  lf <- 1
  for (i in 1:n) {
    x <- sample.int(lf, 1, prob = f)
    if (x == lf) {
      f[x] <- 1
      f <- c(f, alpha)
      lf <- lf + 1
    } else {
      f[x] <- f[x] + 1
    }
    ret[i] <- x
  }
  
  return(ret)
}

#------------------------------------------------
#' @title Draw from Dirichlet process mixture model
#'
#' @description Draw a series of point from the d-dimensional Dirichlet process
#'   with normal prior and normal mixture components.
#'
#' @param n the number of random draws.
#' @param alpha the concentration parameter of the process.
#' @param d the number of dimensions.
#' @param tau the standard deviation of the symmetric normal prior on component
#'   means.
#' @param sigma the standard deviation of the symmetric normal mixture
#'   distribution. Equal for all components.
#'
#' @export

rDPM <- function(n, alpha = 1, d = 1, tau = 10, sigma = 1) {
  
  # check inputs
  assert_single_pos_int(n, zero_allowed = FALSE)
  assert_single_pos(alpha)
  assert_single_pos_int(d, zero_allowed = FALSE)
  assert_single_pos(tau)
  assert_single_pos(sigma)
  
  # draw group alloction from Chinese restaurant process
  c <- rCRP(n, alpha = alpha)
  k <- length(unique(c))
  
  # draw mixture component means
  mu <- matrix(rnorm(k*d, sd = tau), ncol = d)
  
  # draw values from mixture components
  x <- apply(mu[c,,drop = FALSE], 2, function(x) {
    rnorm(length(x), mean = x, sd = sigma)
  })
  
  # return as list
  ret <- list(group = c,
              ngroup = k,
              mu = mu,
              x = x)
  
  return(ret)
}

#------------------------------------------------
#' @title Check numerical series for basic data entry mistakes
#'
#' @description Compares two numeric vectors. For numbers that differ by a
#'   single digit, returns the "depth" of this digit from the end of the number
#'   (i.e. 1.234 vs. 1.334 would have a depth of 3). Identical numbers return 0,
#'   and numbers that differ by more than one digit return NA.
#'   
#' @details A clue that a data entry mistake has occurred is if two numbers are
#'   identical at all digits except for a single digit. This clue is stronger if
#'   the digit is towards the middle of the number, as the chance of two random
#'   numbers being identical at many subsequent digits is small. This function
#'   can be used to flag these values which can then be checked by hand more
#'   easily.
#'   
#' @param x1,x2 two numeric vecors.
#' @param nsmall number of digits to the right of decimal point that numbers are
#'   formatted to.
#'
#' @export

check_data_entry <- function(x1, x2, nsmall) {
  
  # check inputs
  assert_vector_numeric(x1)
  assert_vector_numeric(x2)
  assert_same_length(x1, x2)
  assert_single_pos_int(nsmall, zero_allowed = TRUE)
  
  # get vectors into characters with same number of digits
  n <- length(x1)
  x1_char <- format(x1, nsmall = nsmall, scientific = FALSE)
  x2_char <- format(x2, nsmall = nsmall, scientific = FALSE)
  
  # split into characters
  l1 <- strsplit(gsub("\\.", "", x1_char), "")
  l2 <- strsplit(gsub("\\.", "", x2_char), "")
  maxchar <- length(l1[[1]])
  
  # replace lists with NA in original series
  l1[is.na(x1)] <- NA
  l2[is.na(x2)] <- NA
  
  # find depth of differences
  ret <- mapply(function(i) {
    if (is.na(l1[[i]][1]) || is.na(l2[[i]][1])) {
      return(NA)
    }
    w <- which(l1[[i]] != l2[[i]])
    ret <- NA
    if (length(w) == 0) {
      ret <- 0
    } else if (length(w) == 1) {
      ret <- maxchar + 1 - w
    }
    ret
  }, seq_along(l1))
  
  ret
}

#------------------------------------------------
#' @title Find which elements are not shared between two sets
#'
#' @description The base \code{setdiff(vec1, vec2)} function is asymmetric in
#'   the sense that it returns elements of \code{vec1} that are not found within
#'   \code{vec2}, but it does not perform the reverse comparison. In contrast,
#'   \code{setdiff_symmetric(vec1, vec2)} is symmetric in that it returns any
#'   elements that are found in one vector but not the other.
#'   
#' @param x,y vectors (of the same mode) containing a sequence of items.
#'
#' @export

setdiff_symmetric <- function(x, y) {
  setdiff(union(x, y), intersect(x, y))
}

#------------------------------------------------
#' @title Simulate allele frequencies from lattice model assuming two allele
#'   classes
#'
#' @description Simulate evolution under the lattice model of Kimura and Weiss.
#'   Can be defined over a plane or over a torus. Outputs the raw count of the
#'   reference allele in each deme, replicated over all specified output times
#'   as a list of matrices.
#'
#' @details Assumes a haploid population of size \code{N} and a single,
#'   biallelic locus. Implements a finite-alleles mutation model with equal
#'   chance of mutating to each allele. Initialises allele frequencies at
#'   \code{p_init} in all demes. Under the lattice model demes can only exchange
#'   migrants with other demes immediately adjacent, and do so with probability
#'   \eqn{m/2} in both the x- and y-dimensions. This means the total probability
#'   of migrating is actually \eqn{2m} for each individual in a deme, which
#'   seems strange but is how the lattice model was described in the original
#'   papers of Kimura and Weiss. By default, boundaries are reflecting, meaning
#'   demes at the edges have half the migration probability of demes further in.
#'   This tends to cause edge effects due to the buildup of relatedness at the
#'   boundaries. The option exists to make boundaries periodic, in which case
#'   the lattice model is defined over a torus rather than a plane. This
#'   eliminates edge effects, but leads to complex correlations over long
#'   distances.
#'   
#' @param demes_x,demes_y number of demes in each dimension.
#' @param N number of individuals per deme. The same for all demes.
#' @param mu mutation rate. Assumes finite-alleles model, with equal chance of
#'   mutating to each allele.
#' @param m per-generation probability of migrating to an adjacent deme in the
#'   x- and y-dimensions. The total probability of leaving a given deme is
#'   therefore \eqn{2m}.
#' @param t_out vector of times at which results will be output.
#' @param p_init the initial allele frequency, which is the same in all demes.
#' @param torus if \code{TRUE} then model is defined over a torus, i.e. with
#'   period rather than reflecting boundaries. Defaults to \code{FALSE}.
#' @param silent if \code{TRUE} then suppress output to console.
#'
#' @importFrom utils txtProgressBar
#' @export

sim_lattice_biallelic <- function(demes_x, demes_y, N, mu, m, t_out, p_init = 0.0,
                                  torus = FALSE, silent = FALSE) {
  
  # check inputs
  assert_single_pos_int(demes_x, zero_allowed = FALSE)
  assert_single_pos_int(demes_y, zero_allowed = FALSE)
  assert_single_pos_int(N, zero_allowed = FALSE)
  assert_single_bounded(mu)
  assert_single_bounded(m, right = 0.5)
  assert_vector_pos_int(t_out, zero_allowed = TRUE)
  assert_single_bounded(p_init)
  assert_single_logical(torus)
  assert_single_logical(silent)
  
  # make argument list
  args <- list(demes_x = demes_x, demes_y = demes_y, N = N, mu = mu, m = m,
               t_out = t_out, p_init = p_init, torus = torus, silent = silent)
  
  # create progress bars
  pb <- txtProgressBar(min = 0, max = max(c(1, t_out)), initial = NA, style = 3)
  args_progress <- list(pb = pb)
  
  # functions to pass to C++
  args_functions <- list(update_progress = update_progress)
  
  # run efficient C++ function
  output_raw <- sim_lattice_biallelic_cpp(args, args_functions, args_progress)
  
  #return(output_raw)
  
  # process output into matrices
  output_processed <- list()
  for (i in seq_along(t_out)) {
    output_processed[[i]] <- output_raw[[i]] %>%
      unlist() %>%
      matrix(nrow = demes_x)
  }
  names(output_processed) <- sprintf("t%s", seq_along(t_out))
  
  return(output_processed)
}