pez: Phylogenetics for the Environmental Sciences

#' Phylogenetically weighted regression: a method for modeling
#' non-stationarity on evolutionary trees
#'
#' \code{pwr.cv} performs model cross validation on the holdout set
#' \code{holdout}, K-fold cross validation can be specified using
#' \code{fold}. \code{pwr.treePlot} plots the phylogeny and allows for
#' plotting of PWR model coefficients at the tips of the tree. PWR
#' model coefficients to be pllotted must be appended to the phylo4d
#' object. \code{pwr.phylobubbles} plots either circles or squares
#' corresponding to the magnitude of each cell of a phylo4d object,
#' and allows the plotting of PWR moel coefficients.  Model
#' coefficients to be plotted must be appended to the phylo4d object
#' @param formula Model formula to be fit
#' @param x output from
#' @param phy4d \code{phylobase} format object to be used for fitting
#' @param bw Bandwidth for model fit
#' @param wfun Weighting function to be used; myust be one of
#'     \code{brownian}, \code{martins}, \code{gaussian}, or
#'     \code{Gaus}
#' @param verbose Whether to be verbose during model fitting (default
#'     \code{FALSE})
#' @param holdout vector of integers indicating position of tips on
#'     the tree to be held out during model fitting
#' @param coef coefficients to extract from the model; must be one of
#'     \code{formula}, \code{slope}, or \code{all}
#' @param model Model type to be fit; must be either \code{pwr}, or
#'     \code{pgls}
#' @param which.tips vector of integers indicating position of tips on
#'     the tree to be used, defaults to all species on the tree
#' @param object \code{pwr} object to have its coefficients extracted
#' @param method Numerical optimization algorithm to be used to
#'     estimate confidence intervals; should be one of subplex,
#'     optimize, or L-BFGS-B.
#' @param ... Optional argument passed on to internal \code{phylobase}
#'     plotting functions
#' @note Some information about this
#' @return Phylogenetically weighted model object; use \code{coef.pwr}
#'     to extract coefficients, and \code{pwr.tp} and
#'     \code{pwr.phylobubbles} for plotting.
#' @author Jim Regetz, T. Jonathan Davies, Elizabeth M. Wolkovich,
#'     Brian J. McGill, and a tiny bit of cosmetic tidying from Will
#'     Pearse. Plotting code (\code{pwr.tp} and
#'     \code{pwr.phylobubbles}) only slightly adapted from code in
#'     \code{phylobase}; thank you to the authors of \code{phylobase}!
#' 
#' @seealso caper:pgls phylobase:tp phylobase:phylobubbles
#' @references Davies, T. J., Regetz, J., Wolkovich, E. M., & McGill,
#'     B. J. (2019). Phylogenetically weighted regression: A method
#'     for modelling non-stationarity on evolutionary trees. Global
#'     Ecology and Biogeography, 28(2), 275-285.
#' @examples
#' # Below are some examples in order of increasing complexity: (1) a
#' # simple demonstration, and (2) some cross-validation examples.
#' 
#' ##########################################
#' # (1) A simple example ###################
#' ##########################################
#' # Make up some data
#' # - Note the deep bifurcation in the slope term
#' if(FALSE){
#' tree <- read.tree(text=
#'     "(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1);")
#' dat <- data.frame(
#'    x = c(0, 1, 2, 3, 0, 1, 2, 3),
#'    y = rnorm(8, c(1, 2, 3, 4, 2, 1.5, 1, 0.5), sd=0.25)
#' )
#'
#' # Combine in phylobase format, and then run a scatterplot
#' c.data <- phylo4d(tree, dat)
#' plot(tipData(c.data)$x, tipData(c.data)$y, pch=rep(c(1,16), each=4),
#'     xlab="Predictor", ylab="Response", bty="l")
#' text(tipData(c.data)$x, tipData(c.data)$y, labels=rownames(tipData(c.data)),
#'     pos=3, cex=0.8)
#'
#' # Phylogenetically weighted-regression with various weighting functions
#' pwr.b <- coef(pwr(y ~ x, combined.data, wfun="brownian"))
#' pwr.m <- coef(pwr(y ~ x, combined.data, wfun="martins"))
#' pwr.g <- coef(pwr(y ~ x, combined.data, wfun="gaussian"))
#' pwr.G <- coef(pwr(y ~ x, combined.data, wfun="Gauss"))
#' 
#' # Run a PGLS
#' pgls <- gls(y ~ x, data=dat, correlation=corBrownian(phy=tree))
#'
#' # Contrast the PWR and PGLS estimates
#' pwr.treePlot(addData(c.data, data.frame(gest=coef(pgls)[2], glb=confint(pgls)[2,1],
#'    gub=confint(pgls)[2,2], pwr.m))[,c("est", "lb", "ub", "gest", "glb",
#'    "gub")], show.tip.label=TRUE, lower=-1., upper=1.5, pex=2, aex=3.5)
#' }
#' @importFrom ape read.tree 
#' @importFrom nlme gls corMatrix Initialize
#' @importFrom phylobase phylo4d nTips tipData edges nEdges hasEdgeLength edgeLength<- isRooted plotOneTree tipLabels phyloXXYY tdata
#' @importMethodsFrom phylobase phylo4d
#' @importFrom subplex subplex
#' @importFrom grid grid.newpage grid.newpage upViewport plotViewport pushViewport grid.draw grid.text gpar viewport unit grid.layout convertWidth stringWidth convertUnit grid.segments grid.points unit.c grob
#' @importFrom stats nlm confint fitted dnorm predict
#' @importFrom ape corBrownian corMartins
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @rdname pwr
#' @name pwr
#' @aliases pwr coef.pwr plot.pwr
#######################################
# Fitting functions ###################
#######################################
#' @export
pwr <- function(formula, phy4d, which.tips, bw, wfun, verbose=FALSE) {
    if (missing(which.tips)) {
        which.tips <- seq.int(nTips(phy4d))
    }
    if (missing(bw)) {
        if (verbose) cat("Computing optimal bandwidth...")
        bw <- .get.opt.bw(formula, phy4d, wfun)
        if (verbose) cat("done.\n")
    }
    if (verbose) cat("Using bandwidth of", bw, "\n",
        "Generating weights matrix\n")
    wts <- .getWts(phy4d, bw, wfun)
    if (verbose) cat("Running PWR...\n")
    if (verbose) pb <- txtProgressBar(min=0, max=nTips(phy4d), style=3)
    ans <- lapply(which.tips, function(i) {
        if (verbose) setTxtProgressBar(pb, i)
        .pwr.wts(formula, phy4d, wts[,i])
    })
    if (verbose) close(pb)
    attr(ans, "weights") <- wts
    class(ans) <- "pwr"
    return(ans)
}
#' @export
#' @rdname pwr
#' @method coef pwr
coef.pwr <- function(object, ...) {
    if(!inherits(object, "phy.wt.reg"))
        stop("Error, ", object, " is not a phylogenetically weighted regression")
    est <- sapply(seq_along(object), function(i) {
        if (nrow(object[[i]])==2) {
            object[[i]][2,1]
        } else NA
    })
    lb <- sapply(seq_along(object), function(i) {
        if (nrow(object[[i]])==2) {
            object[[i]][2,1] - 1.96*object[[i]][2,2]
        } else NA
    })
    ub <- sapply(seq_along(object), function(i) {
        if (nrow(object[[i]])==2) {
            object[[i]][2,1] + 1.96*object[[i]][2,2]
        } else NA
    })
    data.frame(est, lb, ub)
}
#' @export
#' @rdname pwr
pwr.cv <-  function(formula, phy4d, holdout, coef=c("formula","slope","all"), model=c("pwr","pgls"), wfun, method) {
    coef <- match.arg(coef)
    model <- match.arg(model)

    if(model=="pwr"){
        # extract training set from phy4d
        phy.train <- phy4d[setdiff(seq(nTips(phy4d)), holdout)]
        if (missing(bwidth)) {
            bwidth <- .get.opt.bw(formula, phy.train, wfun=wfun, method=method)
        }
        # get weights vectors for *all* species, but only keep rows
        # corresponding to training set
        wts.train <- .getWts(phy4d, bwidth, wfun)[-holdout,]
        # extract training data from phy4d
        dat.train <- tipData(phy.train)
        # loop over each point and do a weighted least squares regression
        # with weights based on distance
        yhat <- sapply(holdout, function(p) {
            w <- wts.train[,p]
            b <- lm(formula, data=data.frame(dat.train, w), weights=w)
            predict(b, tipData(phy4d)[p,])
        })
        if(coef=="formula")
            return(data.frame(
                y=tipData(phy4d)[holdout, as.character(formula)[[2]]],
                yhat.pwr=yhat)
                )
        if(coef=="slope")
            return(data.frame(
                slope=tipData(phy4d)[holdout, "true_slope"],
                slope.pwr=yhat)
                )
        if(coef=="all")
            return(data.frame(
                slope=tipData(phy4d)[holdout,],
                slope.pwr=yhat)
                )
        } else  {
            # extract training set from phy4d
            p4.train <- phy4d[setdiff(seq(nTips(phy4d)), holdout)]
            phy.train <- suppressWarnings(as(p4.train, "phylo"))
            # extract training data from phy4d
            dat.train <- tipData(p4.train)
            # loop over each point and do a weighted least squares regression
            # with weights based on distance
            pgls <- do.call(gls, list(model=formula, data=dat.train,
                                      correlation=corBrownian(phy=phy.train)))
            if(coef=="all")
                return(data.frame(
                    y=tipData(phy4d)[holdout, ],
                    yhat.pgls=predict(pgls, tipData(phy4d)[holdout,]))
                    )
            if(coef=="formula")
                return(data.frame(
                    y=tipData(phy4d)[holdout, as.character(formula)[[2]]],
                    yhat.pgls=predict(pgls, tipData(phy4d)[holdout,]))
                    )
            if(coef=="slope")
                return(data.frame(
                    slope=tipData(phy4d)[holdout, "true_slope"],
                    slope.pgls=pgls$coefficients[[2]])
                    )
        }
}

#######################################
# Plotting functions ##################
#######################################
#' @export
#' @rdname pwr
#' @method plot pwr
plot.pwr <- function (x, ...) {
    if (!inherits(x, "phylo4"))
        stop("treePlot requires a phylo4 or phylo4d object")
    if (!isRooted(x))
        stop("treePlot function requires a rooted tree.")
    grid.newpage()
    Nedges <- nEdges(x)
    Ntips <- nTips(x)
    tip.order <- NULL
    if (!is.null(tip.order) && length(tip.order) > 1) {
        if (length(tip.order) != Ntips) {
            stop("tip.order must be the same length as nTips(x)")
        }
        if (is.numeric(tip.order)) {
            tip.order <- tip.order
        } else {
            if (is.character(tip.order)) {
                tip.order <- as.numeric(names(tipLabels(x))[
                    match(tip.order, tipLabels(x))])
            }
        }
        tip.order <- rev(tip.order)
    }
    if (!hasEdgeLength(x)) {
        edgeLength(x) <- rep(1, Nedges)
    }
    pushViewport(plotViewport(margins=c(1.1, 1.1, 1.1, 1.1)))
    .pwr.phylobubbles(type="phylogram", show.node.label=FALSE,
        rot=0, edge.color="black", node.color="black",
        tip.color="black", edge.width=1,
        show.tip.label=TRUE, newpage=TRUE, XXYY=phyloXXYY(x, tip.order), ...)
    upViewport()
}
.pwr.phylobubbles <- function (type=type, place.tip.label="right",
    show.node.label=show.node.label, show.tip.label=show.tip.label,
    edge.color=edge.color, node.color=node.color, tip.color=tip.color,
    edge.width=edge.width, newpage=TRUE, cex=1, pex=1, aex=1, ..., XXYY,
    square=FALSE, show.estimates=TRUE, lower=NULL, upper=NULL) {

    nVars <- 1
    lab.right <- ifelse(place.tip.label %in% c("right", "both"),
        TRUE, FALSE) && show.tip.label
    lab.left <- ifelse(place.tip.label %in% c("left", "both"),
        TRUE, FALSE) && show.tip.label
    phy <- XXYY$phy
    tmin <- min(tdata(phy, type="tip"), na.rm=TRUE)
    tmax <- max(tdata(phy, type="tip"), na.rm=TRUE)
    pedges <- edges(phy)
    tip.order <- XXYY$torder
    tipdata <- tdata(phy, type="tip")[tip.order, , drop=FALSE]
    dlabwdth <- max(stringWidth(colnames(tipdata))) * 1.2
    if (convertWidth(dlabwdth, "cm", valueOnly=TRUE) < 2) {
        dlabwdth <- unit(2, "cm")
    }
    phyplotlayout <- grid.layout(nrow=2, ncol=2, heights=unit.c(unit(1,
        "null"), dlabwdth), widths=unit(c(1, 1), c("null",
        "null"), list(NULL, NULL)))
    pushViewport(viewport(layout=phyplotlayout, name="phyplotlayout"))
    pushViewport(viewport(layout.pos.row=1:2, layout.pos.col=2,
        height=unit(1, "npc") + convertUnit(dlabwdth, "npc"),
        name="bubbleplots", default.units="native"))
    tys <- XXYY$yy[pedges[, 2] <= nTips(phy)]
    tys <- tys[match(names(tipLabels(phy))[tip.order], XXYY$torder)]
    maxr <- ifelse(ncol(tipdata) > nTips(phy), 1/ncol(tipdata),
        1/nTips(phy))
    tipdataS <- apply(tipdata, 2, function(x) (maxr * x)/max(abs(x),
        na.rm=TRUE))
    if (is.null(lower)) {
        lower <- min(tipdata)
    }
    if (is.null(upper)) {
        upper <- max(tipdata)
    }
    tipdataS2 <- .rescale(tipdata, lower, upper)
    if (nVars == 1) {
        xpos <- 0.5
    } else {
        xpos <- seq(0 + maxr + 0.12, 1 - maxr - 0.12, length.out=nVars)
    }
    xrep <- rep(xpos, each=length(tys))
    yrep <- rep(tys, nVars)
    ccol <- ifelse(tipdata < 0, "black", "white")
    naxs <- matrix(xrep, ncol=nVars)
    nays <- matrix(yrep, ncol=nVars)
    dnas <- is.na(tipdataS)
    naxs <- naxs[dnas]
    nays <- nays[dnas]
    tipdataS[is.na(tipdataS)] <- 0 + 0.001
    if (lab.right) {
        tiplabwidth <- max(stringWidth(tipLabels(phy)))
    } else {
        tiplabwidth <- unit(0, "null", NULL)
    }
    bublayout <- grid.layout(nrow=2, ncol=2, widths=unit.c(unit(1,
        "null", NULL), tiplabwidth), heights=unit.c(unit(1,
        "null", NULL), dlabwdth))
    pushViewport(viewport(x=0.5, y=0.5, width=0.95, height=1,
        layout=bublayout, name="bublayout"))
    pushViewport(viewport(name="bubble_plots", layout=bublayout,
        layout.pos.col=1, layout.pos.row=1))

    # plot x-axis
    labs <- pretty(c(lower, upper))
    vals <- .rescale(labs, lower, upper)
    labs <- format(labs[0 <= vals & vals <= 1], nsmall=1)
    vals <- vals[0 <= vals & vals <= 1]
    ex <- -0.02 * aex
    grid.segments(x0=min(vals), x1=max(vals), y0=ex, y1=ex)
    grid.segments(x0=vals, x1=vals, y0=ex+0.01, y1=ex,
        gp=gpar(col="black"))
    grid.text(labs, x=vals, y=ex-0.01*aex, gp=gpar(cex=0.5*cex))
    grid.text("Coefficient", x=0.5, y=ex-0.02*aex, gp =
        gpar(cex=0.5*cex))

    # plot interesting results
    grid.segments(x0=tipdataS2$gest, x1=tipdataS2$gest, y0=0,
        y1=1, gp=gpar(col="grey"))
    grid.segments(x0=tipdataS2$glb, x1=tipdataS2$glb, y0=0, y1=1,
        gp=gpar(col="grey", lty="dashed"))
    grid.segments(x0=tipdataS2$gub, x1=tipdataS2$gub, y0=0, y1=1,
        gp=gpar(col="grey", lty="dashed"))
    grid.segments(x0=tipdataS2$lb, x1=tipdataS2$ub, y0=tys, y1=tys,
        gp=gpar(col="red"))
    if (!is.null(tipdataS2$lb.1) & !is.null(tipdataS2$ub.1)) {
        grid.segments(x0=tipdataS2$lb.1, x1=tipdataS2$ub.1,
            y0=tys+0.3*diff(tys[1:2]), y1=tys+0.3*diff(tys[1:2]),
            gp=gpar(col="grey"))
    }
    if (!is.null(tipdataS2$lb.2) & !is.null(tipdataS2$ub.2)) {
        grid.segments(x0=tipdataS2$lb.2, x1=tipdataS2$ub.2,
            y0=tys+0.6*diff(tys[1:2]), y1=tys+0.6*diff(tys[1:2]),
            gp=gpar(col="green"))
    }
    if (!is.null(tipdataS2$simcoef)) {
        grid.points(tipdataS2$simcoef, yrep, pch=1, size =
            unit(0.03*pex, "npc"))

    }
    if (show.estimates) {
        grid.points(tipdataS2$est, yrep, pch=16, size=unit(0.02*pex,
            "npc"), gp=gpar(col="red"))

    }

    if (length(naxs) > 0) {
        grid.points(naxs, nays, pch=4)
    }
    upViewport()
    if (lab.right) {
        pushViewport(viewport(name="bubble_tip_labels", layout=bublayout,
            layout.pos.col=2, layout.pos.row=1))
        tt <- sub("_", " ", tipLabels(phy)[tip.order])
        grid.text(tt, 0.1, tys, just="left", gp=gpar(cex=0.5*cex))
        upViewport()
    }
    pushViewport(viewport(name="bubble_data_labels", layout=bublayout,
        layout.pos.col=1, layout.pos.row=2))
    datalaboffset <- convertUnit(unit(15, "mm"), "npc", valueOnly=TRUE)
    upViewport(3)
    pushViewport(viewport(layout.pos.row=2, layout.pos.col=1,
        name="bubblelegend"))
    yyy <- grob(tipdata, tipdataS, cl = "bubLegend")
    grid.draw(yyy)
    upViewport()
    pushViewport(viewport(layout.pos.row=1, layout.pos.col=1,
        name="tree"))
    plotOneTree(XXYY, "phylogram", show.tip.label=FALSE, show.node.label,
        edge.color, node.color, tip.color, edge.width, rot=0)
    upViewport(2)

}

#######################################
# Internal functions ##################
#######################################
# Run PWR given weight function and bandwidth
.pwr.wfn <- function(formula, phy4d, wfun, bwidth, holdout=FALSE) {
    wts <- .getWts(phy4d, bwidth, wfun)
    bs <- list()
    res <- c()
    yhat <- c()
    # loop over each point and do a weighted least squares regression
    # with weights based on distance
    for (p in 1:nTips(phy4d)) {
        w <- wts[,p]
        if (holdout) {
            w[p] <- 0
        }
        b <- lm(formula, data=data.frame(tipData(phy4d), w), weights=w)
        bs[[p]] <- coef(b)
        res[p] <- resid(b)[p]
        yhat[p] <- fitted(b)[p]
    }
    coef <- do.call("rbind", bs)
    return(list(coef=coef, resid=res, fitted=yhat))
}
# Run PWR given explicit weight values
.pwr.wts <- function(formula, phy4d, wts, verbose=FALSE, plot=FALSE) {
    tree <- suppressWarnings(as(phy4d, "phylo"))
    dat <- data.frame(tipData(phy4d), wts=wts)
    # do straight pwr
    pwr <- lm(formula, data=dat, weights=wts)
    if (verbose) {
        cat("\nPWR confidence intervals:\n")
        print(confint(pwr))
    }
    return(coef(summary(pwr)))
}
# optimal bandwidth finder
.get.opt.bw <- function(formula, phy4d, wfun, interval, method="subplex",
    verbose=FALSE) {

    if (wfun == "brownian") {
        if (verbose) {
            message("no bandwidth for brownian distance; returning NULL")
        }
        return(NULL)
    }
    # set bounds on possible bandwidth values - these work fine for MS
    # purposes but may need to be revisited for generality
    if (missing(interval)) {
        dist <- cophenetic(suppressWarnings(as(phy4d, "phylo")))
        if (wfun %in% c("gaussian", "Gauss")) {
            lo <- 0.01
            hi <- max(dist)/1.5
        } else if (wfun == "exponential") {
            lo <- 0.01
            hi <- 20
        } else if (wfun == "martins") {
            lo <- 0
            hi <- -log(1e-6)/min(dist[lower.tri(dist)])
        }
        interval <- c(lo, hi)
    }
    if (verbose) message("-- Running ", method, " --")
    if (method=="subplex") {
        optfn.subplex <- function(logbwidth) {
            sum(.pwr.wfn(formula, phy4d, wfun, exp(logbwidth), TRUE)$resid^2)
        }
        runtime <- system.time(res <- subplex(-1, optfn.subplex))
        if (res$convergence!=0) {
           warning(paste("bandwidth optimization problem (code ",
               res$convergence, ")", sep=""))
        }
        opt.bw <- exp(res$par)
    } else if (method=="optimize") {
        optfn.optimize <- function(bwidth) {
            sum(.pwr.wfn(formula, phy4d, wfun, bwidth, TRUE)$resid^2)
        }
        runtime <- system.time(res <- optimize(optfn.optimize, interval=interval))
        opt.bw <- res$minimum
    } else if (method=="L-BFGS-B") {
        optfn.lbgfsb <- function(bwidth) {
            sum(.pwr.wfn(formula, phy4d, wfun, bwidth, TRUE)$resid^2)
            #vals <- .pwr.wfn(formula, phy4d, wfun, bwidth, TRUE)
            #if (any(is.na(vals$coef[, "x"]))) Inf else sum(vals$resid^2)
        }
        runtime <- system.time(res <- optim(1, optfn.lbgfsb, lower=0,
            method="L-BFGS-B"))
        opt.bw <- res$par
    } else if (method=="nlm") {
        optfn.nlm <- function(bwidth) {
            sum(.pwr.wfn(formula, phy4d, wfun, bwidth, TRUE)$resid^2)
        }
        runtime <- system.time(res <- nlm(optfn.nlm, 0))
        opt.bw <- res$minimum
    } else {
        stop("invalid optimization algorithm")
    }
    if (verbose) {
        print(runtime)
        message("bandwidth = ", opt.bw)
    }
    opt.bw
}
# create full matrix of weights
.getWts <- function(phy4d, bw, wfun) {
    data <- tipData(phy4d)
    tree <- suppressWarnings(as(phy4d, "phylo"))
    dist <- cophenetic(tree)
    switch(wfun,
        gaussian = {
            sqd <- sqrt(dist)
            dnorm(t(t(sqd) / apply(sqd, 2, sd)) / bw)
        },
        exponential = {
            exp(-dist/bw)
        },
        Gauss = {
            exp((-0.5) * ((dist^2)/(bw^2)))
        },
        brownian = {
            corMatrix(Initialize(corBrownian(phy=tree), data))
        },
        martins = {
            corMatrix(Initialize(corMartins(bw, phy=tree), data))
        },
        stop("invalid distance weighting scheme")
    )
}
.rescale <- function(x, lower=NULL, upper=NULL, na.rm=TRUE) {
    if (is.null(lower)) {
        lower <- min(x, na.rm=na.rm)
    }
    if (is.null(upper)) {
        upper <- max(x, na.rm=na.rm)
    }
    (x - lower) / (upper - lower)
}