R/utils.R

Defines functions rank_list ragged_bin_by ragged_bin rackb rackl probcomb pmf_by_rank percent_cdf normalize get_cis1 get_cis fit_pmf draw_exp_decay distribute convo_ranks convo_rank convo_plus convo_minus convo_lis1 convo_list convo_lis convo_add convo cdf_dif build_mats boot_ids bin_by

Documented in bin_by boot_ids build_mats cdf_dif convo convo_add convo_lis convo_list convo_minus convo_plus convo_rank convo_ranks distribute draw_exp_decay fit_pmf get_cis normalize percent_cdf pmf_by_rank probcomb rackb rackl ragged_bin ragged_bin_by rank_list

# Functions for Deriving Inherited Age Distributions




#' bin by
#'
#' Divides a vector by length into bins, takes the sum of a corresponding variable.
#'
#' @param vec is the vector to divide by
#' @param var is the variable to sum
#' @param bins is the number of bins to divide into
#' @return a plot and dataframe of mean `var` per bin
#' @export

bin_by <- function(vec, var, bins = 10)  {
  mat <- matrix(c(vec,var), ncol = 2)
  mat <- mat[order(vec), ]
  vec_len <- length(vec)
  bin_len <- ceiling(vec_len / bins)
  vec_ids <- 1:length(vec)
  bin_list <- list()
  ranges <- vector(bins, mode = 'character')
  xs <- vector(bins, mode = 'numeric')
  ys <- vector(bins, mode = 'numeric')
  for (i in 1:bins)  {
    if (i == 1)  {
      bin_list[[i]] <- vec_ids[vec_ids <= bin_len]
      bin <- mat[bin_list[[i]], ]
      ranges[i] <- paste0(round(bin[1,1], 2),' to ', round(bin[nrow(bin), 1], 2))
      xs[i] <- mean(bin[,1])
      ys[i] <- sum(bin[,2])
    }

    if (i > 1)  {
      bin_list[[i]] <- vec_ids[vec_ids > (i-1) * bin_len &
                                 vec_ids <= i * bin_len]
      bin <- mat[bin_list[[i]], ]
      ranges[i] <- paste0(round(bin[1,1], 2),' to ', round(bin[nrow(bin), 1], 2))
      xs[i] <- mean(bin[,1])
      ys[i] <- sum(bin[,2])
    }
  }
  df <- data.frame(x = xs, y = ys, rng = ranges)
  return(df)
}

#' boot ids
#'
#' wrapper for lapply, given a rank list from a site, returns
#' `n` bootstraps with replacement of equal length as the rank list
#'
#' @param n is an integer specifying the number of bootstraps
#' @param rl is a rank list output from function `rank_list`
#' @return `n` bootstraps with replacement of length `rl`
#' @seealso rank_list
#' @export

boot_ids <- function(n, rl)  {
  mclapply(rl, function(a) lapply(seq_len(n), function(x,y) sort(sample(y,rep=T)), y = a))
}



#' build_mats condenses a sparse matrix down to a dense one.
#'
#' Given two vectors of equal length, returns a matrix where values of the first vector are nonzero.
#'
#' @param x is a numeric vector of probabilities (a pmf)
#' @param y is a numeric vector of values associated with x
#' @return a dense matrix (x,y) excluding nonzero probabilities in x
#'
#' @export


build_mats <- function(x,y)  {
  mat <- matrix(c(x,y),ncol=2)
  mat <- mat[mat[,1]>0,]
  mat
}


#' cdf_dif
#'
#' returns a vector of differences between increments of `cdf`
#'
#' @param cdf is a numeric vector increasing monotonically between zero and 1
#' @return a vector of differences between increments of `cdf`
#' @export
cdf_dif <- function(cdf) {
  dif <- 0
  for (i in seq_along(cdf)) {
    if (i == 1) {
      dif[i] <- cdf[i]
    }
    if (i > 1) {
      dif[i] <- cdf[i] - cdf[i-1]
    }
  }
  return(dif)
}


#' derived pmf by subtraction
#'
#' Given two pmfs and an age vector, returns the derived pmf of the convolution x-y.
#' Lengths of x,y and index must be equal.
#'
#' @param x is a numeric vector (older pmf)
#' @param y is a numeric vector (younger pmf)
#' @param index is a numeric vector (years)
#' @return a length(index) numeric vector of the convolved distribution of x-y
#' @seealso convo_minus
#'
#' @export


convo <- function(x,y,index)    {
  probcomb(convo_minus(build_mats(x,index),build_mats(y,index)),sort(index))
}



#' derived pmf by addition
#'
#' Given two pmfs and an age vector, returns the derived pmf of the convolution y+x.
#' Lengths of x,y and index must be equal.
#'
#' @param x is a numeric vector (younger pmf)
#' @param y is a numeric vector (older pmf)
#' @param index is a numeric vector (years)
#' @return a length(index) numeric vector of the convolved distribution of y+x
#' @seealso convo_plus
#'
#' @export


convo_add <- function(x,y,index)    {
  probcomb(convo_plus(build_mats(x,index),build_mats(y,index)),sort(index))
}


#' convolve list
#'
#' returns a list of convolved `pmfs` with ranks in `lis`
#'
#' @param lis is a list of integer ranks
#' @param pmfs is a matrix of pmfs with rownames(vals) (charcoal ages)
#' @return a list of convolved `pmfs` with ranks in `lis`
#' @export

convo_lis <- function(lis, pmfs = char_pmfs)  {
  lapply(lis, function(a,b) convo_ranks(a, b), b = pmfs)
}


#' convo list
#'
#' convolves pmfs by rank list.
#' added 50 years to index because sample age is set to 1950
#' but goes to 2000.
#'
#' @param ranks is a vector of ranks or list of rank vectors
#' @param pmfs is a matrix with cols obs and rows probs
#' @return convolved pmfs by subtraction
#' @export

convo_list <- function(ranks, pmfs = char_pmfs)  {
  index <- as.numeric(rownames(char_pmfs))
  rank_pmfs <- pmf_by_rank(ranks, pmfs)
  lapply(rank_pmfs, function(a)
    setDT(lapply(a, function(b, c) convo(unlist(b), unlist(c), index), c = a[,1])))
}


convo_lis1 <- function(lis, dat, vec)  {
  mclapply(seq_along(lis), function(a)
    mclapply(seq_along(a[[1]]), function(b)
      lapply(b, function(c)
        convo(dat[,lis[[a]][[b]][c]],dat[,lis[[a]][[b]][1]],vec))) %>% unlist) %>% setDT
}


#' derived pmf by subtraction
#'
#' Given two pmfs and a value vector, returns convolved pmf and value vector in matrix.
#' Values at associated with probs along (y) are subtracted from values associated with probs along (x)
#' throughout a discretized x,y event space.  Returns the set of prob:value pairs in a matrix
#' with cols prob, value ordered by value.
#'
#' @param x,y are sparse matrices with cols c(prob,value)
#' @return the set of prob:value pairs in a matrix with cols prob, value ordered by value.


convo_minus <- function(x,y)  {
  mat <- matrix(0,nrow=nrow(x)*nrow(y),ncol=2)
  mat[,1] <- mapply(function(a) mapply(function(b,c) b*c, b=a, c=y[,1]), a=x[,1])
  mat[,2] <- mapply(function(a) mapply(function(b,c) b-c, b=a, c=y[,2]), a=x[,2])
  mat[order(mat[,2]),]
}



#' convo_plus convolves two pmfs by adding their values.
#'
#' Given two pmfs and a value vector, returns convolved pmf and value vector in matrix.
#' Values at associated with probs along (x) are added to values associated with probs along (y)
#' throughout a discretized x,y event space.  Returns the set of prob:value pairs in a matrix
#' with cols prob, value ordered by value.
#'
#' @param x,y are sparse matrices with cols c(prob,value)
#' @return the set of prob:value pairs in a matrix with cols prob, value ordered by value.


convo_plus <- function(x,y)  {
  mat <- matrix(0,nrow=nrow(x)*nrow(y),ncol=2)
  mat[,1] <- mapply(function(a) mapply(function(b,c) b*c, b=a, c=y[,1]), a=x[,1])
  mat[,2] <- mapply(function(a) mapply(function(b,c) b+c, b=a, c=y[,2]), a=x[,2])
  mat[order(mat[,2]),]
}



#' convolve by rank
#'
#' Given a vector of ranks, pulls the pmfs for each rank and subtracts
#' the youngest from each via convolution.
#'
#' @param ranks is a vector of ranks (element of rank list)
#' @param pmfs is the matrix of charcoal pmfs
#' @param vals is the vector of values (years) associated with pmfs
#' @param dat is the charcoal variable data.table
#' @return pmfs of ranks convolved by subtracting youngest from each
convo_rank <- function(ranks,
                       pmfs = char_pmfs,
                       vals = as.numeric(colnames(pmfs)),
                       dat = charcoal)  {
  ranks <- sort(ranks)
  young <- pmfs[rownames(pmfs) == dat[rank == ranks[1], site_id],]
  ar <- array(0,c(ncol(pmfs), length(ranks)))
  for (i in seq_along(ranks))  {
    vec <- pmfs[rownames(pmfs) == dat[rank == ranks[i], site_id], ]
    ar[,i] <- convo(young, vec, vals)
  }
  ar
}



#' convolve vector of ranks
#'
#' subtracts the pmf with first rank in `vec` from pmfs with ranks in `vec`
#' via convolution
#'
#' @param vec is a vector of sorted numeric ranks
#' @param pmfs is a matrix of pmfs with cols of obs by rank and rows of probs
#' @return a matrix of pmfs convolved by subtracting pmf of lowest rank from pmfs
#'   with ranks in `vec`
#' @import magrittr
#' @export

convo_ranks <- function(vec, pmfs = char_pmfs)  {
  index <- as.numeric(rownames(pmfs))
  mclapply(seq_along(vec), function(a)
    mclapply(seq_along(a[[1]]), function(b)
      lapply(b, function(c)
        convo(pmfs[,vec[[a]][[b]][c]],pmfs[,vec[[a]][[b]][1]], index))) %>% unlist) %>% setDT
}



#' distribute
#'
#' given a pdf for a vector of values, returns a sample space of vals distributed
#' around the pdf
#'
#' @param pdf is a vector of probs summing to one
#' @param val is a vector of values associated with `pdf`
#' @param size is a large integer (>100)
#' @return sample of vals reflective of pdf
#' @export

distribute <- function(pdf, val, size=10000){
  vec <- 0
  for (i in seq_along(pdf)) {
    vec <- c(vec, replicate(round(size * pdf[i]), val[i]))
  }
  vec[-1]
}



#' Draw from exponential decay distribution
#'
#' Draw from exponential decay distribution $y = x^k$
#'
#' @param n is the max value returned by the decay function (integer)
#' @param k is the exponent $x^k$ (numeric)
#' @return \code{n} draws from the exponential decay distribution.
#' @export

draw_exp_decay <- function(n,k)  {
  xs <- 1:n
  ys <- (n - xs)^k
  unlist(mapply(function(x,y) rep(x,y), x = xs, y=ys))
}




#' Find the PMF of values vector.
#'
#' Given a numeric vector of values \code{vals}, and an index of values \code{index},
#' find the pmf of \code{vals}
#'
#' @param vals is a numeric vector of values
#' @param index is a numeric index of values
#' @return a numeric vector containing the pmf of \code{vals} along \code{index}
#' @export

fit_pmf <- function(vals,index)  {
  cdf <- to_cdf(vals)
  pmf <- to_pmf(cdf)
  mat <- matrix(c(pmf,vals),ncol=2)
  probcomb(mat,index)
}



#' get_cis returns confidence intervals of a vector
#'
#' Given a vector of probabilities (x), returns a numeric vector with
#' values at 2.5\%, median and 97.5\% of the distribution (x)
#'
#' @param x is a numeric vector of probabilities
#' @return a numeric vector with values at 2.5\%, median and 97.5\% of the distribution (x)
#' @export

get_cis <- function(x, lwr=.0250, med=.5000, upr=.9750)  {
  n <- length(x)
  id_lwr <- round(n * lwr)
  id_med <- round(n * med)
  id_upr <- round(n * upr)
  vec <- sort(x)
  cis <- vec[c(id_lwr, id_med, id_upr)]
  names(cis) <- c('lwr','med','upr')
  cis
}

get_cis1 <- function(x, lwr=.0250, med=.5000, upr=.9750)  {
  cis <- vector(length=3, mode='numeric')
  vec <- staTools::cdf(x)
  k <- 1
  while (vec$y[k]<lwr) k <- k+1
  cis[1] <- vec$x[k]
  while (vec$y[k]<med) k <- k+1
  cis[2] <- vec$x[k]
  while (vec$y[k]<=upr & k<length(vec$y)) k <- k+1
  cis[3] <- vec$x[k]
  names(cis) <- c('lwr','med','upr')
  cis
}




#' normalize
#'
#' normalize a vector of probabilities
#'
#' @param probs is a numeric vector of probabilities
#' @return normalized vector of `probs`
#' @export
normalize <- function(probs)  {
  tot <- sum(probs)
  for (i in seq_along(probs))  {
    probs[i] <- probs[i] / tot
  }
  probs
}


#' percent_cdf
#'
#' computes the cdf of `vec`
#' coerces to length `ln`
#'
#' @param vec is a numeric vector
#' @param ln is an integer
#' @return a cdf length `ln ` of `vec`
#' @export
percent_cdf <- function(vec, ln = 100) {
  vec <- emp_cdf(vec)[,2]
  n <- length(vec)
  cdf <- vector(length = ln, mode = 'numeric')
  for (i in 1:ln) {
    cdf[i] <- length(vec[vec <= i/ln]) / n
  }
  return(cdf)
}



#' pmf by rank
#'
#' wrapper for lapply, grabs pmfs with rank in `ranks` from main table
#'
#' @param ranks is a vector of ranks (element of rank list)
#' @param pmfs is the matrix of charcoal pmfs
#' @return the matrix of pmfs of rank `ranks`
#' @export

pmf_by_rank <- function(ranks, pmfs = char_pmfs)  {
  mat <- t(pmfs)
  lapply(ranks, function(a) data.table::as.data.table(t(mat[a,])))
}



#' Sums probability age value pairs into a single pmf
#'
#' Given a matrix of probability and age value pairs (x), and a vector of years (y),
#' returns a pmfs of length(y)
#'
#' @param x is a matrix of probability and age pairs
#' @param y is a numeric vector of years
#' @return a numeric vector of length(years) with value of sum probability age is less than
#'    year i in y and greater than year i-1
#'
#' @export


probcomb <- function(x,y)   {
  prob <- array(0,length(y))
  k <- 1
  n <- length(y)
  for (j in 1:nrow(x)){
    while (x[j,2] > y[k] & k < n)  k <- k + 1
    prob[k] <- prob[k] + x[j,1]
  }
  prob
}



#' rack list
#'
#' turn a list of data.tables of equal size into a matrix via cbind
#'
#' @param ls is a list of data.tables of equal size
#' @return data.tables in `ls` colbound into a matrix
#' @export
rackl <- function(ls)  {
  mat <- rack(ls[[1]])
  rows <- nrow(mat)
  cols <- ncol(mat)
  mat <- matrix(0, nrow = rows, ncol = cols * length(ls))
  for (i in seq_along(ls))  {
    mat[ ,((cols * i) - (cols - 1)):(cols * i)] <- rack(ls[[i]])
  }
  mat
}


#' rack boot
#'
#' turn a list of data.tables of equal size into a matrix via cbind
#'
#' @param ls is a list of data.tables of equal size
#' @return data.tables in `ls` colbound into a matrix
#' @export
rackb <- function(ls)  {
  rows <- length(ls[[1]])
  cols <- length(ls)
  mat <- matrix(0, nrow = rows, ncol = cols)
  for (i in seq_along(ls))  {
    mat[ , i] <- ls[[i]]
  }
  vec <- vector(length = rows, mode = 'numeric')
  for (i in seq_along(vec))  {
    vec[i] <- sum(mat[i,])/cols
  }
  vec
}


#' ragged_bin
#'
#' from indexes one to length `ln`
#' return a list of index numbers length `bins`
#' if `ln` is not equally divisible by `bins`
#' the last bin is smaller
#'
#' @param ln is an integer specifying length
#' @param bins is an integer specifying number of bins
#' @return a list length `bins` of index numbers
#' @export
ragged_bin <- function(ln, bins = 10) {
  if(length(ln) > 1) ln <- length(ln)
  bin_len <- ceiling(ln / bins)
  vec_ids <- 1:ln
  bin_list <- list()
  for (i in 1:bins)  {
    if (i == 1)  {
      bin_list[[i]] <- vec_ids[vec_ids <= bin_len]
    }

    if (i > 1)  {
      bin_list[[i]] <- vec_ids[vec_ids > (i-1) * bin_len &
                                 vec_ids <= i * bin_len]
    }
  }
  return(bin_list)
}


#' ragged_bin_by
#'
#' divides range of `by` into `bins` intervals
#' returns list length `bins`
#' elements of list are index numbers of `vec`
#' where values are in range of interval of `by`
#'
#' @param vec is a numeric vector of data
#' @param by is a range of values to bin by
#' @param bins is the number of bins to use
#' @return a list length `bins` of index numbers of `vec` divided along `by`
#' @export
ragged_bin_by <- function(vec, by = vec, bins = 10) {
  rng <- range(by)
  ids <- 1:length(vec)
  ls <- list()
  rng <- rng[1] + ((0:bins / bins) * (rng[2] - rng[1] / bins))
  for (i in 1:bins) {
    if (i == 1) {
      ls[[i]] <- ids[vec >= rng[i] & vec <= rng[i+1]]
    }
    if (i > 1 & i < bins) {
      ls[[i]] <- ids[vec > rng[i] & vec <= rng[i+1]]
    }
    if (i == bins) {
      ls[[i]] <- ids[vec > rng[i]]
    }
  }
  return(list(ls, rng))
}




#' Rank List
#'
#' Utility for returning sorted ranks of obs at sites above a minimum count.
#'
#' @param ftype is a character vector specifying facies type
#' @param dat is the data.table of charcoal site data
#' @param min_count is the minimum number of obs per site (1 is all sites)
#' @return a list of sorted ranks at sites with obs > `min_count`
#' @export

rank_list <- function(ftype, dat = charcoal, min_count = 3)  {
  dt <- dat[facies == ftype]
  dt_n <- dt[, .N]
  sites <- dt[, .N, keyby = .(family)]
  sites <- sites[N > min_count]
  lapply(unlist(sites[,1]), function(x) dt[family == x, rank])
}







#' sum_pmfs add pmfs together and returns a normalized sum
#'
#' Given a list of numeric pmfs (lis) and an integer representing the length
#' of pmfs in lis (len), returns a pmf representing the normalized sum of pmfs
#' in lis
#'
#' @param lis is a list of numeric pmfs
#' @param len is an integer representing the length of pmfs in lis
#' @return a summed and normalized pmf


sum_pmfs <- function(lis,len)  {
  mat <- matrix(unlist(lis), nrow = len)
  mat <- mapply(function(a,b,c) sum(b[a,]), a = seq_len(len), MoreArgs = list(b = mat))
  mat / sum(mat)
}


#' timer
#'
#' wrapper for expressions to measure time to compute
#'
#' @param exp is an expression
#' @param msg is a string allowing a customizable message for output
#' @return prints the time elapsed after expression computes with `msg`
#' @export

timer <- function(exp, msg = 'time elapsed: ') {
  begin <- Sys.time()
  exp
  end <- Sys.time()
  print(paste0(msg, end - begin))
}



#' to_cdf converts numeric pmf vector to cdf of equal length
#'
#' Given a numeric pmf, returns a numeric vector of the cdf.
#'
#'
#' @param pmf is a numeric vector
#' @return a numeric vector of the cdf
#'
#' @export

to_cdf <- function(pmf)  {
  cdf <- array(0,length(pmf))
  if (sum(pmf) != 1) pmf <- normalize(pmf)
  for (i in seq_along(pmf)) {
    if (i == 1) cdf[i] <- pmf[i]
    if (i > 1)  cdf[i] <- pmf[i] + cdf[i-1]
  }
  cdf
}



#' to_exceed converts numeric pmf vectors to log10 exceedance distributions of equal length
#'
#' Given a numeric pmf, returns a numeric vector of the log10 exceedance distribution
#'
#' @param pmf is a numeric vector of probabilities summing to one
#' @return a numeric vector of the log10 exceedance distribution
#' @seealso to_cdf
#' @export

to_exceed <- function(pmf)  {
  1 - to_cdf(pmf)
}


#' to_exceed converts numeric pmf vectors to log10 exceedance distributions of equal length
#'
#' Given a numeric pmf, returns a numeric vector of the log10 exceedance distribution
#'
#' @param vec is a numeric pmf
#' @return a numeric vector of the log10 exceedance distribution
#' @seealso to_cdf

to_lexc <- function(vec)  {
  log10(1 - to_cdf(vec))
}


#' to matrix
#'
#' from list of data.tables to matrix using cbind
#'
#' @param ls is a list of data.tables
#' @return a matrix merged by cbind
#' @export
to_mat <- function(ls)  {
  lapply(ls, rack) %>% rack
}



#' CONVERT VECTOR TO pmf
#'
#' Given the cdf as a numeric vector, return the derived pmf as a numeric vector of equal length.
#'
#' @param cdf is a numeric vector (a cdf)
#' @return the pmf derived from `cdf` as a numeric vector
#' @export

to_pmf <- function(cdf)  {
  pmf <- array(0,length(cdf))
  pmf[1] <- cdf[1]
  for (i in 2:length(cdf))  {
    pmf[i] <- cdf[i] - cdf[i-1]
  }
  pmf
}

#' tracker
#'
#' prints percent complete and time remaining estimate to console
#'
#' @param i is an integer specifying the index position
#' @param n is an integer specifying the number of interations in the loop
#' @param begin is a time object from a call to Sys.time()
#' @return prints percent complete and time remaining estimate to console
#' @export

tracker <- function(i, n, begin) {
  now <- Sys.time()
  dif <- now - begin
  pace <- dif / i
  rem <- (pace * n) - dif
  print(paste0('Percent Done: ', round(i/n*100, 2), '%'))
  print(paste0('Time Remaining: ', round(as.numeric(rem, units = 'hours'), 2), ' hours'))
}



#' truncate at zero
#'
#' truncate a pmf at index values below zero
#'
#' @param pmf is a numeric vector of probabilites
#' @param index is a numeric vector values associates with `pmf`
#' @return `pmf` with probs at vals below zero set to zero
#' @export
trunc_0 <- function(pmf, index)  {
  pmf[index < 0] <- 0
  pmf
}


#' trunc list
#'
#' truncate pmfs in list at values below zero and renormalize
#'
#' @param pmfs is a list of numeric vectors of probabilities that sum to 1
#' @param index is a numeric vector of values associated with probs in `pmfs`
#' @return truncated and renormalized pmfs in list
#' @export
trunc_list <- function(pmfs, index)  {
  lapply(pmfs, function(a) trunc_norm(unlist(a), index))
}



#' truncate & normalize
#'
#' truncates `pmf` at `index` values below zero and renormalizes
#'
#' @param pmf is a numeric vector of probabilities
#' @param index is a numeric vector of values associated with `pmf`
#' @return `pmf` truncated at `index` values below zero and renormalized
#' @export
trunc_norm <- function(pmf, index)  {
  normalize(trunc_0(pmf, index))
}



#' val_by_cdf
#'
#' values at position of cdf of `val`
#'
#' @param val is a numeric vector
#' @param ln is an integer
#' @return values of `val` at cdf of `val`
#' @export
val_by_cdf <- function(vals, ln = 100) {
  vals <- sort(vals)
  vec <- emp_cdf(vals)[,2]
  val <- 0
  for (i in 1:ln) {
    val[i] <- max(vals[vec <= i/ln])
  }
  return(val)
}


















#' Inherited age convolver
#'
#' By oversample site, draw a bootstrap sample with replacement (n) times,
#' convolve the sample distributions by subtracted the youngest from the set,
#' and return the 2.5\%, median, 97.5\% and observed distributions across all sites.
#'
#' @param dat is a data.table with rows of obs and cols of vars
#' @param ftype is a character vector matching facies class
#' @param n is an integer for number of bootstraps
#' @param t is a vector of years
#' @param probs is a matrix with row pmfs and cols of obs ordered by rank
#' @return matrix with row pmfs and cols for 2.5\%, median, 97.5\% and observed distributions
#'
#' @export
#' @import data.table
#' @import parallel
#' @importFrom magrittr %>%

boot_convo <- function(dat=sites, ftype, n, t=years, probs=npmf){
  # subset pmfs by facies type
  dt <- dat[facies == ftype]
  # number of obs
  dt_n <- dt[, .N]
  # number of obs by oss
  fc_n <- dt[, .N, keyby = .(family)]
  # rank list of ob rank by oss
  rl <- lapply(unlist(fc_n[,1]), function(x) dt[family == x, rank])
  #
  csl <- mclapply(seq_along(rl), function(a,b) convo_lis(b[[a]], probs, t), b=rl)
  csdt <- array(0,c(length(t),dt_n)) %>% as.data.table
  cism <- array(0,c(12,length(t)))
  csvec <- c(0,unlist(cumsum(fc_n[,2])))
  for (i in 1:length(csl))  {
    csdt[,(csvec[i]+1):csvec[i+1]] <- csl[[i]]
  }
  cism[4,] <- apply(csdt,1,function(x) sum(x)/ncol(csdt))
  cism[8,] <- cumsum(cism[4,])
  cism[12,] <- 1 - cism[8,]
  # id list of bootstrap samples of oss ranks by oss family, boot
  idl <- mclapply(rl, function(a) lapply(seq_len(n), function(x,y) sort(sample(y,rep=T)), y = a))

  cbl <- mclapply(seq_along(idl), function(a,b) convo_lis(b[[a]], probs, t), b=idl)
  dt <- array(0,c(length(t),length(cbl)*n)) %>% as.data.table
  for (i in 1:length(cbl))  {
    dt[,((i-1)*n+1):(i*n)] <- cbl[[i]]
  }
  cism[1:3,] <- apply(dt,1,get_cis)
  dt <- apply(dt,1,cumsum)
  bit <- apply(dt,2,get_cis)
  # dt <- apply(dt,1,function(a) 1-a)
  #  bat <- apply(dt,1,get_cis)
  csl
  #  cis <- mapply(function(a,b) get_cis(b[,.(a)]), a=seq_len(ncol(dt)), MoreArgs=list(b=dt))
  #  dt2 <- mapply(function(a,b) cumsum(b[,.(a)]/sum(b[,.(a)])), a=seq_len(ncol(dt)), MoreArgs=list(b=dt))
  #  cism[5:7,] <- mapply(function(a,b) get_cis(b[,.(a)]), a=seq_len(ncol(dt)), MoreArgs=list(b=dt))
  #  dt3 <- mapply(function(a,b) 1 - b[,.(a)], a=seq_len(ncol(dt2)), MoreArgs=list(b=dt2))
  #  cism[9:11,] <- mapply(function(a,b) get_cis(b[,.(a)]), a=seq_len(ncol(dt)), MoreArgs=list(b=dt))
  #  cis
}



#  DIFFER  #

differ <- function(vec,delta=0)	{

  #given a vector > length 2
  #return a variable of differences between elements

  for (i in 2:length(vec))	{
    delta[i] <- (vec[i] - vec[i-1])
  }
  return(delta)
}


# CUMULT #

cumult <- function(vec,dif,cum=0) {

  # given a vector > length 2
  # return a cumulative distribution curve

  for (i in 1:length(vec))	{
    cum[i] <- sum(vec[1:i]) / dif
  }
  return(cum)
}


#' Rack List
#'
#' Rack list with elements of equal length into a matrix using cbind
#'
#' @param ls is a list with elements of equal length
#' @return elements of `ls` column bound into a matrix
#' @export
rack <- function(ls)  {
  rows <- length(ls[[1]])
  cols <- length(ls)
  mat <- matrix(0, nrow = rows, ncol = cols)
  for (i in seq_along(ls))  {
    mat[ , i] <- ls[[i]]
  }
  mat
}


#' CIs of Boot
#'
#' Return confidence intervals from a matrix of pmf bootstraps.
#'
#' @param boot is a matrix of pmfs
#' @param pmfs is the reference matrix of pmfs (charcoal pmfs)
#' @return vector of cis of `boot`
#' @import magrittr
#' @import parallel
#' @export
cis_of_boot <- function(boot, pmfs = char_pmfs)  {
  index <- as.numeric(rownames(pmfs)) %>% sort
  trunc <- mclapply(boot, function(a,b)  trunc_norm(a, b), b = index) %>% setDT
  dt <- trunc %>% t %>% as.data.table
  ob_pmf <- trunc %>% rowSums %>% normalize
  cis <- mclapply(dt, get_cis) %>% rack %>% rbind(ob_pmf)
  rownames(cis) <- c('lwr','med','upr','obs')
  colnames(cis) <- index %>% as.character
  cis
}

id_by_rank <- function(ranks, dat = charcoal)  {
  lapply(ranks, function(a) dat[rank == a, site_id]) %>% unlist
}




log10.axis <- function(side, at, ...) {
  at.minor <- log10(outer(1:9, 10^(min(at):max(at))))
  lab <- sapply(at, function(i) as.expression(bquote(10^ .(i))))
  axis(side=side, at=at.minor, labels=NA, tcl=par('tcl')*0.5, ...)
  axis(side=side, at=at, labels=lab)
}
crumplecup/muddier documentation built on Aug. 18, 2021, 11:02 a.m.