R/peak_utils.R

Defines functions peakSummary lodPeaks

Documented in lodPeaks peakSummary

#' LOD score peaks
#'
#' Identify and summarise LOD score peaks.
#'
#' A peak is defined as a run of at least `width` consecutive markers with LOD
#' score above or equal to `threshold`. If possible, one flanking marker is
#' included on each side of the peak.
#'
#' @param x A [linkres] object, or data frame with columns `CHROM`, `MB`, `LOD`.
#' @param threshold A single number
#' @param width A positive integer
#' @param physmap A matrix or data frame with three columns: Marker
#'   name, chromosome and physical position.
#'
#' @return A list of data frames.
#'
#' @seealso [linkres], [lod()], [merlinLod()]
#'
#' @examples
#'
#' # Use built-in dataset `dominant1`
#' lods = lod(x = dominant1$ped,
#'            aff = dominant1$aff,
#'            model = diseaseModel("AD"))
#'
#' # All peaks above LOD = 1.5
#' lodPeaks(lods, threshold = 1.5)
#' peakSummary(lods, threshold = 1.5)
#'
#' @export
lodPeaks = function(x, threshold, width = 1) {

  peak1chr = function(xchr, threshold, width) { # xchr must be sorted!
    rl = rle(xchr$LOD >= threshold)
    while (1) {
      short = rl$values & (rl$lengths < width)
      if (any(short)) {
        rl$values[short] <- FALSE
        rl = rle(inverse.rle(rl))
      }
      else break
    }
    xchr_nrow = nrow(xchr)
    if (!any(rl$values))
      return(NULL)

    start_ind = c(0, cumsum(rl$lengths))[which(rl$values)]
    stop_ind = start_ind + rl$lengths[rl$values] + 1  # plus 1 to compensate for endpoint[1]

    lapply(1:length(start_ind), function(i) {
      strt = start_ind[i]
      stp = stop_ind[i]
      telomeric = c(if (strt == 0) "start", if (stp > xchr_nrow) "end")
      if (length(telomeric) == 0)
        telomeric = ""
      strt = max(strt, 1)
      stp = min(stp, xchr_nrow)
      structure(xchr[strt:stp, , drop = FALSE], rownames = NULL, telomeric = telomeric)
    })
  }

  df = x[!is.na(x$LOD), , drop = FALSE]
  chrs = unique.default(df$CHROM)
  res = list()
  for (chr in chrs) {
    dfchr = df[df$CHROM == chr, , drop = FALSE]
    res = c(res, peak1chr(dfchr, threshold = threshold, width = width))
  }
  res

}

#' @rdname lodPeaks
#' @importFrom utils read.table
#' @export
peakSummary = function(x, threshold, width = 1, physmap = NULL) {
  if (inherits(x, "linkres")) {
    if (is.null(threshold))
      stop2("argument `threshold` is missing, with no default")
    x = lodPeaks(x, threshold, width)
  }
  dat = lapply(x, function(df) {
    n = nrow(df)
    from_mb = df$MB[1]
    to_mb = df$MB[n]

    data.frame(
      CHROM = df$CHROM[1],
      FROM = df$MARKER[1],
      TO = df$MARKER[n],
      N = n,
      FROM_MB = from_mb,
      TO_MB = to_mb,
      LEN = to_mb - from_mb,
      TELO = attr(df, "telomeric"),
      MAXLOD = round(max(df$LOD),3),
      stringsAsFactors = FALSE
    )
  })

  a = do.call(rbind, dat)
  if (!is.null(physmap)) {
    if (is.matrix(physmap))
      physmap = as.data.frame(physmap)
    if (is.character(physmap))
      physmap = read.table(physmap, header = T, as.is = TRUE)
    if (is.data.frame(physmap)) {
      from_bp = physmap[match(a$FROM, physmap[, 1]), 3]
      to_bp = physmap[match(a$TO, physmap[, 1]), 3]
      a = cbind(a, FROM_BP = from_bp, TO_BP = to_bp)
      a = a[, c(1:5, 12:13, 6:11)]
    }
  }
  a
}

Try the paramlink2 package in your browser

Any scripts or data that you put into this service are public.

paramlink2 documentation built on Feb. 16, 2023, 6:05 p.m.