R/fusco.R

Defines functions plot.fusco summary.fusco print.fusco fusco.test

Documented in fusco.test plot.fusco print.fusco summary.fusco

## CAIC to caper
## - finally accepted that the Markov simulation code to test CI's on uncorrected I was
##   only being kept in for stubborness and isn't needed for benchmarking
## - this collapse much of the code back into a single function.

fusco.test <- function(phy, data , names.col , rich , tipsAsSpecies=FALSE,
                       randomise.Iprime=TRUE,  reps=1000, conf.int = 0.95){
    
	# want to be able to just run it on a phylogeny for the topology
	if(inherits(phy, 'phylo') & missing(data)){
		tipsAsSpecies <- TRUE
		data <- data.frame(nSpp = rep(1, length(phy$tip.label)), tips=phy$tip.label)
		phy <- comparative.data(phy = phy, data = data, names.col = 'tips')
	} else if(inherits(phy, "phylo" ) & ! missing(data)){
		if(missing(names.col)) stop('Names column not specified')
		names.col <- deparse(substitute(names.col))
		# old style use with data and phylogeny separated
		# let comparative.data() match and handle class checking
		phy <- eval(substitute(comparative.data(phy = phy, data = data, names.col = XXX), list(XXX=names.col)))
    } else if(! inherits(phy, 'comparative.data')) {
		stop('phy must be a phylo or comparative.data object.')
    }
    
	# check for the richness column
	if( ! tipsAsSpecies){
		if(missing(rich)) {
			stop('The name of a column of richness values must be provided')
		} else {
			rich <- deparse(substitute(rich))
	    	if(! rich %in% names(phy$data)) stop("The column '", rich, "' was not found in the data from '", phy$data.name,"'.")
	    }
 	}

	# get into pruningwise order
	# method dispatch works on either 'phylo' or 'comparative.data'
	phy <- reorder(phy, "pruningwise")
    if(tipsAsSpecies){
		rich <- rep(1, length(phy$phy$tip.label))
	} else {
		rich <- phy$data[,rich,drop=TRUE] # vector of values 	
	}
	
	nSpecies <- sum(rich)
	nTips <- length(rich)
	
	# calculate fusco 
    intNodes <- unique(phy$phy$edge[,1])
    nTip <- length(phy$phy$tip.label)
    nNode <- phy$phy$Nnode
    rich <- c(rich, rep(NA, nNode))
    
    # Data store
    observed <- data.frame(polytomy=logical(nNode), N1 = numeric(nNode),
                      N2 = numeric(nNode), row.names=intNodes)
    
    # loop tips
    for(ind in seq(along=intNodes)){
        
        # grab the daughters of the node
        daughters <- phy$phy$edge[,2][phy$phy$edge[,1] == intNodes[ind]]
        richD <- rich[daughters]
        
        if(length(daughters) > 2){
            observed$polytomy[ind] <- TRUE  
        } else {
            observed[ind, 2:3] <- richD
        }
        
        rich[intNodes[ind]] <- sum(richD)
    }
    
    observed$S <- with(observed, N1 + N2)
    observed <- observed[! observed$polytomy & observed$S > 3, names(observed) != 'polytomy']
    
    observed$B <- with(observed, pmax(N1, N2))
    observed$M <- observed$S - 1
    observed$m <- ceiling(observed$S/2)
    
    observed$I <- with(observed, (B - m)/(M - m))
    observed$S.odd <- (observed$S %% 2) == 1
    observed$w <- with(observed, ifelse(S.odd, 1, ifelse(I > 0, M/S, 2*M/S))) 
    observed$I.w <- with(observed, (I*w)/mean(w))
    observed$I.prime <- with(observed, ifelse(S.odd, I, I * M/S))
    
    # add node labels if present [Arne Mooers]
    if(! is.null(phy$phy$node.label)){
        observed$node <- phy$phy$node.label[as.numeric(rownames(observed)) - nTip]
    }
    
    obsStats <- with(observed, c(median(I), IQR(I)/2))
    
    ret <- list(observed=observed, median.I=median(observed$I), mean.Iprime=mean(observed$I.prime),
                qd=IQR(observed$I)/2, tipsAsSpecies=tipsAsSpecies,
                 nInformative=dim(observed)[1], nSpecies=nSpecies, nTips=nTips)
                
    class(ret) <- "fusco"
    
    if(randomise.Iprime){
	
	   expFun <- function(x){
	    	y <- runif(length(x))
	    	rand.I.prime <- ifelse(y>0.5,x,1-x)
	    	ret <- mean(rand.I.prime)
	    	return(ret)
	    }

	    randomised <- with(ret, replicate(reps, expFun(observed$I.prime)))
	    randomised <- as.data.frame(randomised)
	    names(randomised) <- "mean"
	    rand.twotail <- c((1 - conf.int)/2, 1 - (1 - conf.int)/2)
	    rand.mean <- quantile(randomised$mean, rand.twotail)

	    ret <- c(ret, list(randomised=randomised, rand.mean=rand.mean, reps=reps, conf.int=conf.int))
    }

    class(ret) <- "fusco"
    return(ret)
}

## ## testing method combinations
## birdFuscoTest <- fusco.test(fuscoBirdTree)  # tree only explicitly
## birdFuscoTest <- fusco.test(fuscoBirdTree, tipsAsSpecies=TRUE) # tree only explicitly
## birdFuscoTest <- fusco.test(fuscoBirdTree, fuscoBirdData, tipLab, nSpp) # old style
## fusco <- comparative.data(fuscoBirdTree, fuscoBirdData, tipLab)
## birdFuscoTest <- fusco.test(fusco, rich=nSpp) # new style
## birdFuscoTest <- fusco.test(fusco, tipsAsSpecies=TRUE)

print.fusco <- function(x, ...){
    print(x$observed)
}

summary.fusco <- function(object, ...){
    cat("Fusco test for phylogenetic imbalance\n\n")
    
    cat("  Tree with", object$nInformative, "informative nodes and", object$nTips, "tips.\n")
    if(object$tipsAsSpecies){
        cat("  Tips are treated as species.\n\n")
    } else {
        cat("  Tips are higher taxa containing", object$nSpecies, "species.\n")
    }
    
    if(! is.null(object$randomised) | ! is.null(object$simulated)){
        cat(" ", sprintf("%2.1f%%", object$conf.int*100), "confidence intervals around 0.5 randomised using", object$reps, "replicates.\n" )
    }
    cat("\n")

    cat("  Mean I prime:", round(object$mean.Iprime,3))
    if(! is.null(object$randomised)){
        cat(sprintf(" [%1.3f,%1.3f]",  object$rand.mean[1],object$rand.mean[2]))
    }
    cat("\n")
    
    cat("  Median I:", round(object$median.I,3))
    if(! is.null(object$simulated)){
        cat(sprintf(" [%1.3f,%1.3f]",  object$sim.median[1],object$sim.median[2]))
    }
    cat("\n")
    cat("  Quartile deviation in I:", round(object$qd,3))
    if(! is.null(object$simulated)){
        cat(sprintf(" [%1.3f,%1.3f]",  object$sim.qd[1],object$sim.qd[2]))
    }
    cat("\n")
    
    print(wilcox.test(object$observed$I.prime, mu=0.5))
    
}

plot.fusco <- function(x, correction=TRUE, nBins=10, right=FALSE, I.prime=TRUE, plot=TRUE, ...){
    
    
    breaks <- seq(0,1,length=nBins+1)
    if(I.prime){
        fuscoDist <- hist(x$observed$I.prime, breaks=breaks, plot=FALSE, right=right)
        xLab <- "Nodal imbalance score (I')"
    } else {
        fuscoDist <- hist(x$observed$I, breaks=breaks, plot=FALSE, right=right)        
        xLab <- "Nodal imbalance score (I)"
    }
    
    interv <- levels(cut(0.5, breaks=breaks, right=right)) # hijack the interval notation code
    RET <- data.frame(imbalance=interv, observedFrequency=fuscoDist$density/nBins)
    
    if(correction==TRUE){
        # find out the distribution of possible values for 
        # each node given the number of classes
        allPossI <- function(S, I.prime){
            m <- ceiling(S/2)
            RET <- (seq(from=m, to=S-1) - m)/((S - 1) - m)
            if(I.prime & (S%%2) == 1) RET <- RET * (S-1) / S
            return(RET)
        }
        
        distrib <- sapply(x$observed$S, FUN=allPossI, I.prime=I.prime)
        distrib <- sapply(distrib, function(x) hist(x, breaks=breaks, plot=FALSE, right=right)$density)
        distrib <- distrib/nBins
        correction <- 1/nBins - distrib
        correction <- rowMeans(correction)
        
        # distrib <- sapply(x$observed$S, FUN=allPossI)
        # distrib <- hist(unlist(distrib), breaks=breaks, plot=FALSE)
        # correction <- 1/nBins - distrib$density/nBins
        
        RET$correction <- correction
        RET$correctedFrequency <- with(RET, observedFrequency + correction)
        
        # hijack the histogram plotting
        if(plot){
        plot(structure(list(density=RET$correctedFrequency, breaks=breaks), 
             class="histogram"), freq=FALSE, ylab="Corrected Frequency", main="",
             xlab=xLab)}
    } else {
        # hijack the histogram plotting
        if(plot){
        plot(structure(list(density=RET$observedFrequency, breaks=breaks), 
             class="histogram"), freq=FALSE, ylab="Observed Frequency", main="",
             xlab=xLab)}
    }
    
    if(plot){
    if(I.prime){
        abline(v=x$mean.Iprime)
        if(! is.null(x$rand.mean)) abline(v=x$rand.mean, col="red")        
    } else {
        abline(v=median(x$observed$I))
        if(! is.null(x$sim.median)) abline(v=x$sim.median, col="red")
    }}

    invisible(RET)
    
}

Try the caper package in your browser

Any scripts or data that you put into this service are public.

caper documentation built on Sept. 26, 2023, 5:10 p.m.