R/util.R

Defines functions split.phylo check.integer classify.by.splits data.frame.to.vectors ks.test.quick

Documented in data.frame.to.vectors

## diversitree imports of internal functions:

## Makes a colour vector semitransparent
add.alpha <- diversitree:::add.alpha

## These were useful for checking problems, but are currently not used.
## ancestors <- diversitree:::ancestors
## mrca.tipset <- diversitree:::mrca.tipset

## Split a tree 'x' into a focal clade subtended by node 'f', and a
## basal group (everything except this clade).  This returns a list
## where the first element is the basal group and the second group is
## the target clade.
split.phylo <- function(x, f, drop=FALSE, ...) {
  phy.tip <- extract.clade(x, f)
  phy.base <- diversitree:::drop.tip.fixed(x, phy.tip$tip.label)
  list(phy.base, phy.tip)
}

## Check to see if a vector is integer (or close enough to integer).
## If error is FALSE, this will return NULL
check.integer <- function(x, error=TRUE) {
  nna <- !is.na(x)
  if (max(abs(x[nna] - round(x[nna]))) > 1e-08)
    if ( error )
      stop("Non-integer argument for ", deparse(substitute(x)))
    else
      return(NULL)
  storage.mode(x) <- "integer"
  x
}

## Classify the edges of a tree 'phy' into different groups based on
## stepwise splitting of the tree at a series of nodes.  This is
## similar (identical?) to the algorithm that MEDUSA uses, and is
## equivalent to the series of trees generated by ksi()

## If 'group' is provided, we will add *new* groups to this.  No check
## is made that this vector is sensible, and 'base' must be provided.
## Basically internal use only.
classify.by.splits <- function(phy, nodes, group=NULL, base=NULL) {
  nodes <- match(nodes, c(phy$tip.label, phy$node.label))
  edge <- phy$edge
  root <- length(phy$tip.label) + 1

  storage.mode(nodes) <- "integer"
  storage.mode(edge) <- "integer"
  n.tip <- as.integer(length(phy$tip.label))

  if ( is.null(group) ) {
    group <- rep(1, nrow(edge))
    base <- integer(length(nodes))
  } else {
    if ( max(group)-1 > length(base) )
      stop("This just isn't going to work out")
    base <- c(base, integer(length(nodes)))
  }

  for ( node in nodes ) {
    if (is.na(node)) 
      next
    
    depth <- max(group) + 1
    i <- diversitree:::descendants.idx.C(node, edge, n.tip)
    base[depth-1] <- if ( node == root ) 1 else group[edge[, 2] == node]
    group[i[group[i] == base[depth-1]]] <- depth
  }

  list(group=group, base=base)
}

## Convert a data frame to a series of vectors named with the
## data.frame's rownames, omitting all NA values.
data.frame.to.vectors <- function(dat) {
  f <- function(x) {
    names(x) <- rownames(dat)
    x[!is.na(x)]
  }
  lapply(dat, f)
}


  
## Ten nice colours.
cols10 <- c("#eaab00", "#e37222", "#803d0d", "#cc0033", "#4f2d7f",
            "#56364d", "#473b63", "#162274", "#004165", "#618e02",
            "#5c705e")[-7][c(1, 7, 3, 4, 10, 6, 2, 8, 9, 5)]


## This is a brutally trimmed down version of ks.test() that just
## computes the D statistic.  This should be identical to the
## statistic produced by ks.test(), but about 4x faster.
ks.test.quick <- function(x, y) {
  names(x) <- names(y) <- NULL
  x <- x[!is.na(x)]
  y <- y[!is.na(y)]
  n.x <- as.double(length(x)) # } recast to avoid integer overflow
  n.y <- as.double(length(y)) # }
  if ( n.x * n.y < 1L)
    stop("not enough data")

  w <- c(x, y)
  n <- n.x + n.y
  
  ord <- sort.int(w, method="quick", index.return=TRUE)$ix
  z <- rep.int(-1/n.y, n)
  z[ord <= n.x]  <- 1/n.x
  z <- cumsum(z)

  wi <- w[ord]
  if ( any(same <- wi[-1] == wi[-n]) )
    z <- z[c(!same, TRUE)]

  max(abs(z))
}
richfitz/ksi documentation built on May 27, 2019, 8:19 a.m.