Nothing
#######
## dapc
########
dapc <- function (x, ...) UseMethod("dapc")
###################
## dapc.data.frame
###################
#' @method dapc data.frame
#' @export
dapc.data.frame <- function(x, grp, n.pca=NULL, n.da=NULL,
center=TRUE, scale=FALSE, var.contrib=TRUE, var.loadings=FALSE,
pca.info=TRUE, pca.select=c("nbEig","percVar"), perc.pca=NULL, ..., dudi=NULL){
## FIRST CHECKS
grp <- as.factor(grp)
if(length(grp) != nrow(x)) stop("Inconsistent length for grp")
pca.select <- match.arg(pca.select)
if(!is.null(perc.pca) & is.null(n.pca)) pca.select <- "percVar"
if(is.null(perc.pca) & !is.null(n.pca)) pca.select <- "nbEig"
if(!is.null(dudi) && !inherits(dudi, "dudi")) stop("dudi provided, but not of class 'dudi'")
## SOME GENERAL VARIABLES
N <- nrow(x)
REDUCEDIM <- is.null(dudi)
if(REDUCEDIM){ # if no dudi provided
## PERFORM PCA ##
maxRank <- min(dim(x))
pcaX <- dudi.pca(x, center = center, scale = scale, scannf = FALSE, nf=maxRank)
} else { # else use the provided dudi
pcaX <- dudi
}
cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
if(!REDUCEDIM){
myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$li),length(pcaX$eig)))
} else {
myCol <- "black"
}
## select the number of retained PC for PCA
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
if(!is.null(perc.pca)){
n.pca <- min(which(cumVar >= perc.pca))
if(perc.pca > 99.999) n.pca <- length(pcaX$eig)
if(n.pca<1) n.pca <- 1
}
## keep relevant PCs - stored in XU
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
if(n.pca >= N) n.pca <- N-1
n.pca <- round(n.pca)
U <- pcaX$c1[, 1:n.pca, drop=FALSE] # principal axes
rownames(U) <- colnames(x) # force to restore names
XU <- pcaX$li[, 1:n.pca, drop=FALSE] # principal components
XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
## PERFORM DA ##
ldaX <- lda(XU, grp, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
lda.dim <- sum(ldaX$svd^2 > 1e-10)
ldaX$svd <- ldaX$svd[1:lda.dim]
ldaX$scaling <- ldaX$scaling[,1:lda.dim,drop=FALSE]
if(is.null(n.da)){
barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
cat("Choose the number discriminant functions to retain (>=1): ")
n.da <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
##n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
n.da <- round(min(n.da, lda.dim)) # can't be more than K-1 disc. func., or more than n.pca
predX <- predict(ldaX, dimen=n.da)
## BUILD RESULT
res <- list()
res$n.pca <- n.pca
res$n.da <- n.da
res$tab <- XU
res$grp <- grp
res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
res$means <- ldaX$means
res$ind.coord <-predX$x
res$grp.coord <- apply(res$ind.coord, 2, tapply, grp, mean)
res$prior <- ldaX$prior
res$posterior <- predX$posterior
res$assign <- predX$class
res$call <- match.call()
## optional: store loadings of variables
if(pca.info){
res$pca.loadings <- as.matrix(U)
res$pca.cent <- pcaX$cent
res$pca.norm <- pcaX$norm
res$pca.eig <- pcaX$eig
}
## optional: get loadings of variables
if(var.contrib || var.loadings){
var.load <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
if(var.contrib){
f1 <- function(x){
temp <- sum(x*x)
if(temp < 1e-12) return(rep(0, length(x)))
return(x*x / temp)
}
res$var.contr <- apply(var.load, 2, f1)
}
if(var.loadings) res$var.load <- var.load
}
class(res) <- "dapc"
return(res)
} # end dapc.data.frame
#############
## dapc.matrix
#############
#' @method dapc matrix
#' @export
dapc.matrix <- function(x, ...){
return(dapc(as.data.frame(x), ...))
}
#############
## dapc.genind
#############
#' @method dapc genind
#' @export
dapc.genind <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
scale=FALSE, truenames=TRUE, var.contrib=TRUE, var.loadings=FALSE,
pca.info=TRUE, pca.select=c("nbEig","percVar"), perc.pca=NULL, ...){
## FIRST CHECKS
if(!is.genind(x)) stop("x must be a genind object.")
if(is.null(pop)) {
pop.fac <- pop(x)
} else {
pop.fac <- pop
}
if(is.null(pop.fac)) stop("x does not include pre-defined populations, and `pop' is not provided")
## SOME GENERAL VARIABLES
N <- nInd(x)
## PERFORM PCA ##
maxRank <- min(tab(x))
X <- scaleGen(x, center = TRUE, scale = scale,
NA.method = "mean")
## CALL DATA.FRAME METHOD ##
res <- dapc(X, grp=pop.fac, n.pca=n.pca, n.da=n.da,
center=FALSE, scale=FALSE, var.contrib=var.contrib,
var.loadings=var.loadings, pca.select=pca.select, perc.pca=perc.pca)
res$call <- match.call()
## restore centring/scaling
res$pca.cent <- attr(X, "scaled:center")
if(scale) {
res$pca.norm <- attr(X, "scaled:scale")
}
return(res)
} # end dapc.genind
######################
## Function dapc.dudi
######################
#' @method dapc dudi
#' @export
dapc.dudi <- function(x, grp, ...){
return(dapc.data.frame(x$li, grp, dudi=x, ...))
}
#################
## dapc.genlight
#################
#' @method dapc genlight
#' @export
dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
scale=FALSE, var.contrib=TRUE, var.loadings=FALSE, pca.info=TRUE,
pca.select=c("nbEig","percVar"), perc.pca=NULL, glPca=NULL, ...){
## FIRST CHECKS ##
if(!inherits(x, "genlight")) stop("x must be a genlight object.")
pca.select <- match.arg(pca.select)
if(is.null(pop)) {
pop.fac <- pop(x)
} else {
pop.fac <- pop
}
if(is.null(pop.fac)) stop("x does not include pre-defined populations, and `pop' is not provided")
## PERFORM PCA ##
REDUCEDIM <- is.null(glPca)
if(REDUCEDIM){ # if no glPca provided
maxRank <- min(c(nInd(x), nLoc(x)))
pcaX <- glPca(x, center = TRUE, scale = scale, nf=maxRank, loadings=FALSE, returnDotProd = TRUE, ...)
}
if(!REDUCEDIM){ # else use the provided glPca object
if(is.null(glPca$loadings) & var.contrib) {
warning("Contribution of variables requested but glPca object provided without loadings.")
var.contrib <- FALSE
}
pcaX <- glPca
}
if(is.null(n.pca)){
cumVar <- 100 * cumsum(pcaX$eig)/sum(pcaX$eig)
}
## select the number of retained PC for PCA
if(!REDUCEDIM){
myCol <- rep(c("black", "lightgrey"), c(ncol(pcaX$scores),length(pcaX$eig)))
} else {
myCol <- "black"
}
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
if(!is.null(perc.pca)){
n.pca <- min(which(cumVar >= perc.pca))
if(perc.pca > 99.999) n.pca <- length(pcaX$eig)
if(n.pca<1) n.pca <- 1
}
if(!REDUCEDIM){
if(n.pca > ncol(pcaX$scores)) {
n.pca <- ncol(pcaX$scores)
}
}
## recompute PCA with loadings if needed
if(REDUCEDIM){
pcaX <- glPca(x, center = TRUE, scale = scale, nf=n.pca, loadings=var.contrib, matDotProd = pcaX$dotProd)
}
## keep relevant PCs - stored in XU
N <- nInd(x)
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
if(n.pca >= N) n.pca <- N-1
U <- pcaX$loadings[, 1:n.pca, drop=FALSE] # principal axes
XU <- pcaX$scores[, 1:n.pca, drop=FALSE] # principal components
XU.lambda <- sum(pcaX$eig[1:n.pca])/sum(pcaX$eig) # sum of retained eigenvalues
names(U) <- paste("PCA-pa", 1:ncol(U), sep=".")
names(XU) <- paste("PCA-pc", 1:ncol(XU), sep=".")
## PERFORM DA ##
ldaX <- lda(XU, pop.fac, tol=1e-30) # tol=1e-30 is a kludge, but a safe (?) one to avoid fancy rescaling by lda.default
lda.dim <- sum(ldaX$svd^2 > 1e-10)
ldaX$svd <- ldaX$svd[1:lda.dim]
ldaX$scaling <- ldaX$scaling[,1:lda.dim,drop=FALSE]
if(is.null(n.da)){
barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop.fac))) )
cat("Choose the number discriminant functions to retain (>=1): ")
n.da <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
n.da <- min(n.da, length(levels(pop.fac))-1, n.pca, sum(ldaX$svd>1e-10)) # can't be more than K-1 disc. func., or more than n.pca
n.da <- round(n.da)
predX <- predict(ldaX, dimen=n.da)
## BUILD RESULT
res <- list()
res$n.pca <- n.pca
res$n.da <- n.da
res$tab <- XU
res$grp <- pop.fac
res$var <- XU.lambda
res$eig <- ldaX$svd^2
res$loadings <- ldaX$scaling[, 1:n.da, drop=FALSE]
res$means <- ldaX$means
res$ind.coord <-predX$x
res$grp.coord <- apply(res$ind.coord, 2, tapply, pop.fac, mean)
res$prior <- ldaX$prior
res$posterior <- predX$posterior
res$assign <- predX$class
res$call <- match.call()
## optional: store loadings of variables
if(pca.info){
res$pca.loadings <- as.matrix(U)
res$pca.cent <- glMean(x,alleleAsUnit=FALSE)
if(scale) {
res$pca.norm <- sqrt(glVar(x,alleleAsUnit=FALSE))
} else {
res$pca.norm <- rep(1, nLoc(x))
}
res$pca.eig <- pcaX$eig
}
## optional: get loadings of variables
if(var.contrib || var.loadings){
var.load <- as.matrix(U) %*% as.matrix(ldaX$scaling[,1:n.da,drop=FALSE])
if(var.contrib){
f1 <- function(x){
temp <- sum(x*x)
if(temp < 1e-12) return(rep(0, length(x)))
return(x*x / temp)
}
res$var.contr <- apply(var.load, 2, f1)
}
if(var.loadings) res$var.load <- var.load
}
class(res) <- "dapc"
return(res)
} # end dapc.genlight
######################
# Function print.dapc
######################
#' @method print dapc
#' @export
print.dapc <- function(x, ...){
cat("\t#################################################\n")
cat("\t# Discriminant Analysis of Principal Components #\n")
cat("\t#################################################\n")
cat("class: ")
cat(class(x))
cat("\n$call: ")
print(x$call)
cat("\n$n.pca:", x$n.pca, "first PCs of PCA used")
cat("\n$n.da:", x$n.da, "discriminant functions saved")
cat("\n$var (proportion of conserved variance):", round(x$var,3))
cat("\n\n$eig (eigenvalues): ")
l0 <- sum(x$eig >= 0)
cat(signif(x$eig, 4)[1:(min(5, l0))])
if (l0 > 5)
cat(" ...\n\n")
## vectors
TABDIM <- 4
if(!is.null(x$pca.loadings)){
TABDIM <- TABDIM + 3
}
sumry <- array("", c(TABDIM, 3), list(1:TABDIM, c("vector", "length", "content")))
sumry[1, ] <- c('$eig', length(x$eig), 'eigenvalues')
sumry[2, ] <- c('$grp', length(x$grp), 'prior group assignment')
sumry[3, ] <- c('$prior', length(x$prior), 'prior group probabilities')
sumry[4, ] <- c('$assign', length(x$assign), 'posterior group assignment')
if(!is.null(x$pca.loadings)){
sumry[5, ] <- c('$pca.cent', length(x$pca.cent), 'centring vector of PCA')
sumry[6, ] <- c('$pca.norm', length(x$pca.norm), 'scaling vector of PCA')
sumry[7, ] <- c('$pca.eig', length(x$pca.eig), 'eigenvalues of PCA')
}
class(sumry) <- "table"
print(sumry)
## data.frames
cat("\n")
TABDIM <- 6
if(!is.null(x$pca.loadings)){
TABDIM <- TABDIM + 1
}
if(!is.null(x$var.contr)){
TABDIM <- TABDIM + 1
}
if(!is.null(x$var.load)){
TABDIM <- TABDIM + 1
}
sumry <- array("", c(TABDIM, 4), list(1:TABDIM, c("data.frame", "nrow", "ncol", "content")))
sumry[1, ] <- c("$tab", nrow(x$tab), ncol(x$tab), "retained PCs of PCA")
sumry[2, ] <- c("$means", nrow(x$means), ncol(x$means), "group means")
sumry[3, ] <- c("$loadings", nrow(x$loadings), ncol(x$loadings), "loadings of variables")
sumry[4, ] <- c("$ind.coord", nrow(x$ind.coord), ncol(x$ind.coord), "coordinates of individuals (principal components)")
sumry[5, ] <- c("$grp.coord", nrow(x$grp.coord), ncol(x$grp.coord), "coordinates of groups")
sumry[6, ] <- c("$posterior", nrow(x$posterior), ncol(x$posterior), "posterior membership probabilities")
count <- 6
if(!is.null(x$pca.loadings)){
count <- count+1
sumry[count, ] <- c("$pca.loadings", nrow(x$pca.loadings), ncol(x$pca.loadings), "PCA loadings of original variables")
}
if(!is.null(x$var.contr)){
count <- count+1
sumry[count, ] <- c("$var.contr", nrow(x$var.contr), ncol(x$var.contr), "contribution of original variables")
}
if(!is.null(x$var.load)){
count <- count+1
sumry[count, ] <- c("$var.load", nrow(x$var.load), ncol(x$var.load), "loadings of original variables")
}
class(sumry) <- "table"
print(sumry)
## cat("\nother elements: ")
## if (length(names(x)) > 15)
## cat(names(x)[15:(length(names(x)))], "\n")
## else cat("NULL\n")
cat("\n")
} # end print.dapc
##############
## summary.dapc
##############
#' @export
#' @method summary dapc
summary.dapc <- function(object, ...){
x <- object
res <- list()
## number of dimensions
res$n.dim <- ncol(x$loadings)
res$n.pop <- length(levels(x$grp))
## assignment success
temp <- as.character(x$grp)==as.character(x$assign)
res$assign.prop <- mean(temp)
res$assign.per.pop <- tapply(temp, x$grp, mean)
## group sizes
res$prior.grp.size <- table(x$grp)
res$post.grp.size <- table(x$assign)
return(res)
} # end summary.dapc
##############
## scatter.dapc
##############
#' @importFrom vegan orditorp
#'
#' @method scatter dapc
#' @export
scatter.dapc <- function(x, xax = 1, yax = 2, grp = x$grp,
col = seasun(length(levels(grp))),
pch = 20, bg = "white", solid = .7,
scree.da = TRUE, scree.pca = FALSE,
posi.da = "bottomright",
posi.pca = "bottomleft",
bg.inset = "white",
ratio.da = .25, ratio.pca = .25,
inset.da = 0.02, inset.pca = 0.02, inset.solid = .5,
onedim.filled = TRUE, mstree = FALSE,
lwd = 1, lty = 1, segcol = "black",
legend = FALSE, posi.leg = "topright",
cleg = 1, txt.leg = levels(grp),
cstar = 1, cellipse = 1.5, axesell = FALSE,
label = levels(grp), clabel = 1, xlim = NULL, ylim = NULL,
grid = FALSE, addaxes = TRUE, origin = c(0,0),
include.origin = TRUE, sub = "", csub = 1, possub = "bottomleft",
cgrid = 1, pixmap = NULL, contour = NULL,
area = NULL, label.inds = NULL, ...){
ONEDIM <- xax==yax | ncol(x$ind.coord)==1
## recycle color and pch
col <- rep(col, length(levels(grp)))
pch <- rep(pch, length(levels(grp)))
col <- transp(col, solid)
bg.inset <- transp(bg.inset, inset.solid)
## handle grp
if(is.null(grp)){
grp <- x$grp
}
## handle xax or yax NULL
if(is.null(xax)||is.null(yax)){
xax <- 1L
yax <- ifelse(ncol(x$ind.coord)==1L, 1L, 2L)
ONEDIM <- TRUE
}
## handle 1 dimensional plot
if(!ONEDIM){
## set par
opar <- par(mar = par("mar"))
par(mar = c(0.1, 0.1, 0.1, 0.1), bg=bg)
on.exit(par(opar))
axes <- c(xax,yax)
## basic empty plot
s.class(x$ind.coord[,axes], fac = grp, col = col, cpoint = 0,
cstar = cstar, cellipse = cellipse, axesell = axesell,
label = label, clabel = clabel, xlim = xlim, ylim = ylim,
grid = grid, addaxes = addaxes, origin = origin,
include.origin = include.origin,
sub = sub, csub = csub, possub = possub,
cgrid = cgrid, pixmap = pixmap,
contour = contour, area = area)
## add points
colfac <- pchfac <- grp
levels(colfac) <- col
levels(pchfac) <- pch
colfac <- as.character(colfac)
pchfac <- as.character(pchfac)
if(is.numeric(col)) colfac <- as.numeric(colfac)
if(is.numeric(pch)) pchfac <- as.numeric(pchfac)
points(x$ind.coord[,xax],
x$ind.coord[,yax],
col = colfac, pch = pchfac, ...)
s.class(x$ind.coord[,axes], fac = grp, col = col, cpoint = 0,
add.plot=TRUE, cstar = cstar, cellipse = cellipse,
axesell = axesell, label = label,clabel = clabel,
xlim = xlim, ylim = ylim, grid = grid,
addaxes = addaxes, origin = origin,
include.origin = include.origin,
sub = sub, csub = csub, possub = possub,
cgrid = cgrid, pixmap = pixmap,
contour = contour, area = area)
## Add labels of individuals if specified. Play around with "air" to get
## a satisfactory result.
if (!is.null(label.inds) & is.list(label.inds)) {
appendList <- function (x, val) {
# recursevly "bind" a list into a longer list,
# from http://stackoverflow.com/a/9519964/322912
stopifnot(is.list(x), is.list(val))
xnames <- names(x)
for (v in names(val)) {
x[[v]] <- if (v %in% xnames && is.list(x[[v]]) && is.list(val[[v]]))
appendList(x[[v]], val[[v]])
else c(x[[v]], val[[v]])
}
x
}
do.call("orditorp",
c(appendList(list(x = x$ind.coord[, c(xax, yax)],
display = "species"),
label.inds)))
}
## add minimum spanning tree if needed
if(mstree){
meanposi <- apply(x$tab,2, tapply, grp, mean)
D <- dist(meanposi)^2
tre <- ade4::mstree(D)
x0 <- x$grp.coord[tre[,1], axes[1]]
y0 <- x$grp.coord[tre[,1], axes[2]]
x1 <- x$grp.coord[tre[,2], axes[1]]
y1 <- x$grp.coord[tre[,2], axes[2]]
segments(x0, y0, x1, y1, lwd = lwd, lty = lty, col = segcol)
}
} else {
## set screeplot of DA to FALSE (just 1 bar)
scree.da <- FALSE
## get plotted axis
if(ncol(x$ind.coord)==1) {
pcLab <- 1
} else{
pcLab <- xax
}
## get densities
ldens <- tapply(x$ind.coord[,pcLab], grp, density)
allx <- unlist(lapply(ldens, function(e) e$x))
ally <- unlist(lapply(ldens, function(e) e$y))
par(bg=bg)
plot(allx, ally, type = "n",
xlab = paste("Discriminant function", pcLab),
ylab = "Density")
for(i in 1:length(ldens)){
if(!onedim.filled) {
lines(ldens[[i]]$x, ldens[[i]]$y, col = col[i], lwd = 2) # add lines
} else {
polygon(c(ldens[[i]]$x, rev(ldens[[i]]$x)),
c(ldens[[i]]$y, rep(0,length(ldens[[i]]$x))),
col = col[i], lwd = 2, border = col[i]) # add lines
}
points(x = x$ind.coord[grp==levels(grp)[i], pcLab],
y = rep(0, sum(grp==levels(grp)[i])),
pch = "|", col = col[i]) # add points for indiv
}
}
## ADD INSETS ##
## group legend
if(legend){
## add a legend
temp <- list(...)$cex
if(is.null(temp)) temp <- 1
if(ONEDIM | temp<0.5 | all(pch == "")) {
legend(posi.leg, fill = col, legend = txt.leg,
cex = cleg, bg = bg.inset)
} else {
legend(posi.leg, col = col, legend = txt.leg, cex = cleg,
bg = bg.inset, pch = pch, pt.cex = temp)
}
}
## eigenvalues discriminant analysis
if(scree.da && ratio.da>.01) {
inset <- function(){
myCol <- rep("white", length(x$eig))
myCol[1:x$n.da] <- "grey"
myCol[c(xax, yax)] <- "black"
myCol <- transp(myCol, inset.solid)
barplot(x$eig, col=myCol, xaxt="n", yaxt="n", ylim=c(0, x$eig[1]*1.1))
mtext(side=3, "DA eigenvalues", line=-1.2, adj=.8)
box()
}
add.scatter(inset(), posi = posi.da, ratio = ratio.da,
bg.col = bg.inset, inset = inset.da)
}
## eigenvalues PCA
if(scree.pca && !is.null(x$pca.eig) && ratio.pca>.01) {
inset <- function(){
temp <- 100* cumsum(x$pca.eig) / sum(x$pca.eig)
myCol <- rep(c("black","grey"), c(x$n.pca, length(x$pca.eig)))
myCol <- transp(myCol, inset.solid)
plot(temp, col=myCol, ylim=c(0,115),
type="h", xaxt="n", yaxt="n", xlab="", ylab="", lwd=2)
mtext(side=3, "PCA eigenvalues", line=-1.2, adj=.1)
}
add.scatter(inset(), posi = posi.pca, ratio = ratio.pca,
bg.col = bg.inset, inset = inset.pca)
}
return(invisible(match.call()))
} # end scatter.dapc
############
## assignplot
############
assignplot <- function(x, only.grp=NULL, subset=NULL, new.pred=NULL, cex.lab=.75, pch=3){
if(!inherits(x, "dapc")) stop("x is not a dapc object")
## handle data from predict.dapc ##
if(!is.null(new.pred)){
n.new <- length(new.pred$assign)
x$grp <- c(as.character(x$grp), rep("unknown", n.new))
x$assign <- c(as.character(x$assign), as.character(new.pred$assign))
x$posterior <- rbind(x$posterior, new.pred$posterior)
}
## treat other arguments ##
if(!is.null(only.grp)){
only.grp <- as.character(only.grp)
ori.grp <- as.character(x$grp)
x$grp <- x$grp[only.grp==ori.grp]
x$assign <- x$assign[only.grp==ori.grp]
x$posterior <- x$posterior[only.grp==ori.grp, , drop=FALSE]
} else if(!is.null(subset)){
x$grp <- x$grp[subset]
x$assign <- x$assign[subset]
x$posterior <- x$posterior[subset, , drop=FALSE]
}
##table.paint(x$posterior, col.lab=ori.grp, ...)
## symbols(x$posterior)
## FIND PLOT PARAMETERS
n.grp <- ncol(x$posterior)
n.ind <- nrow(x$posterior)
Z <- t(x$posterior)
Z <- Z[,ncol(Z):1,drop=FALSE ]
image(x=1:n.grp, y=seq(.5, by=1, le=n.ind), Z, col=rev(heat.colors(100)), yaxt="n", ylab="", xaxt="n", xlab="Clusters")
axis(side=1, at=1:n.grp,tick=FALSE, labels=colnames(x$posterior))
axis(side=2, at=seq(.5, by=1, le=n.ind), labels=rev(rownames(x$posterior)), las=1, cex.axis=cex.lab)
abline(h=1:n.ind, col="lightgrey")
abline(v=seq(0.5, by=1, le=n.grp))
box()
newGrp <- colnames(x$posterior)
x.real.coord <- rev(match(x$grp, newGrp))
y.real.coord <- seq(.5, by=1, le=n.ind)
points(x.real.coord, y.real.coord, col="deepskyblue2", pch=pch)
return(invisible(match.call()))
} # end assignplot
###############
## a.score
###############
a.score <- function(x, n.sim=10, ...){
if(!inherits(x,"dapc")) stop("x is not a dapc object")
## perform DAPC based on permuted groups
lsim <- lapply(1:n.sim, function(i) summary(dapc(x$tab, sample(x$grp), n.pca=x$n.pca, n.da=x$n.da))$assign.per.pop)
sumry <- summary(x)
## get the a-scores
f1 <- function(Pt, Pf){
tol <- 1e-7
##res <- (Pt-Pf) / (1-Pf)
##res[Pf > (1-tol)] <- 0
res <- Pt-Pf
return(res)
}
lscores <- lapply(lsim, function(e) f1(sumry$assign.per.pop, e))
## make a table of a-scores
tab <- data.frame(lscores)
colnames(tab) <- paste("sim", 1:n.sim, sep=".")
rownames(tab) <- names(sumry$assign.per.pop)
tab <- t(as.matrix(tab))
## make result
res <- list()
res$tab <- tab
res$pop.score <- apply(tab, 2, mean)
res$mean <- mean(tab)
return(res)
} # end a.score
##############
## optim.a.score
##############
optim.a.score <- function(x, n.pca=1:ncol(x$tab), smart=TRUE, n=10, plot=TRUE,
n.sim=10, n.da=length(levels(x$grp)), ...){
## A FEW CHECKS ##
if(!inherits(x,"dapc")) stop("x is not a dapc object")
if(max(n.pca)>ncol(x$tab)) {
n.pca <- min(n.pca):ncol(x$tab)
}
if(n.da>length(levels(x$grp))){
n.da <- min(n.da):length(levels(x$grp))
}
pred <- NULL
if(length(n.pca)==1){
n.pca <- 1:n.pca
}
if(length(n.da)==1){
n.da <- 1:n.da
}
## AUXILIARY FUNCTION ##
f1 <- function(ndim){
temp <- dapc(x$tab[,1:ndim,drop=FALSE], x$grp, n.pca=ndim, n.da=x$n.da)
a.score(temp, n.sim=n.sim)$pop.score
}
## SMART: COMPUTE A FEW VALUES, PREDICT THE BEST PICK ##
if(smart){
## if(!require(stats)) stop("the package stats is required for 'smart' option")
o.min <- min(n.pca)
o.max <- max(n.pca)
n.pca <- pretty(n.pca, n) # get evenly spaced nb of retained PCs
n.pca <- n.pca[n.pca>0 & n.pca<=ncol(x$tab)]
if(!any(o.min==n.pca)) n.pca <- c(o.min, n.pca) # make sure range is OK
if(!any(o.max==n.pca)) n.pca <- c(o.max, n.pca) # make sure range is OK
lres <- lapply(n.pca, f1)
names(lres) <- n.pca
means <- sapply(lres, mean)
sp1 <- smooth.spline(n.pca, means) # spline smoothing
pred <- predict(sp1, x=1:max(n.pca))
best <- pred$x[which.max(pred$y)]
} else { ## DO NOT TRY TO BE SMART ##
lres <- lapply(n.pca, f1)
names(lres) <- n.pca
best <- which.max(sapply(lres, mean))
means <- sapply(lres, mean)
}
## MAKE FINAL OUTPUT ##
res <- list()
res$pop.score <- lres
res$mean <- means
if(!is.null(pred)) res$pred <- pred
res$best <- best
## PLOTTING (OPTIONAL) ##
if(plot){
if(smart){
boxplot(lres, at=n.pca, col="gold", xlab="Number of retained PCs", ylab="a-score", xlim=range(n.pca)+c(-1,1), ylim=c(-.1,1.1))
lines(pred, lwd=3)
points(pred$x[best], pred$y[best], col="red", lwd=3)
title("a-score optimisation - spline interpolation")
mtext(paste("Optimal number of PCs:", res$best), side=3)
} else {
myCol <- rep("gold", length(lres))
myCol[best] <- "red"
boxplot(lres, at=n.pca, col=myCol, xlab="Number of retained PCs", ylab="a-score", xlim=range(n.pca)+c(-1,1), ylim=c(-.1,1.1))
lines(n.pca, sapply(lres, mean), lwd=3, type="b")
myCol <- rep("black", length(lres))
myCol[best] <- "red"
points(n.pca, res$mean, lwd=3, col=myCol)
title("a-score optimisation - basic search")
mtext(paste("Optimal number of PCs:", res$best), side=3)
}
}
return(res)
} # end optim.a.score
#############
## as.lda.dapc
#############
as.lda <- function(...){
UseMethod("as.lda")
}
#' @method as.lda dapc
#' @export
as.lda.dapc <- function(x, ...){
if(!inherits(x,"dapc")) stop("x is not a dapc object")
res <- list()
res$N <- nrow(res$ind.coord)
res$call <- match.call()
res$counts <- as.integer(table(x$grp))
res$lev <- names(res$counts) <- levels(x$grp)
res$means <- x$means
res$prior <- x$prior
res$scaling <- x$loadings
res$svd <- sqrt(x$eig)
class(res) <- "lda"
return(res)
} # end as.lda.dapc
##############
## predict.dapc
##############
#' @method predict dapc
#' @export
predict.dapc <- function(object, newdata, prior = object$prior, dimen,
method = c("plug-in", "predictive", "debiased"), ...){
if(!inherits(object,"dapc")) stop("x is not a dapc object")
method <- match.arg(method)
x <- as.lda(object)
## HANDLE NEW DATA ##
if(!missing(newdata)){
## make a few checks
if(is.null(object$pca.loadings)) stop("DAPC object does not contain loadings of original variables. \nPlease re-run DAPC using 'pca.loadings=TRUE'.")
## We need to convert the data as they were converted during the analysis. Behaviour is:
## - genind: allele frequencies, missing data = mean
## - genlight: allele frequencies, missing data = mean
if (is.genind(newdata)) { # genind object
newdata <- tab(newdata, freq = TRUE, NA.method = "mean")
} else if (inherits(newdata, "genlight")) { # genlight object
newdata <- as.matrix(newdata) / ploidy(newdata)
} else { # any other type of object
newdata <- as.matrix(newdata)
}
if(ncol(newdata) != nrow(object$pca.loadings)) stop("Number of variables in newdata does not match original data.")
## centre/scale data
for(i in 1:nrow(newdata)){ # this is faster for large, flat matrices)
newdata[i,] <- (newdata[i,] - object$pca.cent) / object$pca.norm
}
newdata[is.na(newdata)] <- 0
## project as supplementary individuals
XU <- newdata %*% as.matrix(object$pca.loadings)
} else {
XU <- object$tab
}
## FORCE IDENTICAL VARIABLE NAMES ##
colnames(XU) <- colnames(object$tab)
## HANDLE DIMEN ##
if(!missing(dimen)){
if(dimen > object$n.da) stop(paste("Too many dimensions requested. \nOnly", object$n.da, "discriminant functions were saved in DAPC."))
} else {
dimen <- object$n.da
}
## CALL PREDICT.LDA ##
temp <- predict(x, XU, prior, dimen, method, ...)
## FORMAT OUTPUT ##
res <- list()
res$assign <- temp$class
res$posterior <- temp$posterior
res$ind.scores <- temp$x
return(res)
} # end predict.dapc
## #############
## ## discriVal
## #############
## discriVal <- function (x, ...) UseMethod("discriVal")
## discriVal.data.frame <- function(x, grp, n.pca.max, n.da=NULL, center=TRUE, scale=FALSE, n.pca=NULL, ...){
## ## CHECKS ##
## grp <- factor(grp)
## n.pca <- n.pca[n.pca>0]
## if(is.null(n.da)) {
## n.da <- length(levels(grp))-1
## }
## ## GET FULL PCA ##
## if(missing(n.pca.max)) n.pca.max <- min(dim(x))
## pcaX <- dudi.pca(x, nf=n.pca.max, scannf=FALSE, center=center, scale=scale)
## n.pca.max <- min(n.pca.max,pcaX$rank)
## ## DETERMINE N.PCA IF NEEDED ##
## if(is.null(n.pca)){
## n.pca <- round(pretty(1:n.pca.max,10))
## }
## n.pca <- n.pca[n.pca>0 & n.pca<n.pca.max]
## ## FUNCTION GETTING THE TOTAL DISCRIMINATION (SUM OF EIGENVALUES) FOR ONE GIVEN NB OF PCA PCs ##
## ## n.pca is a number of retained PCA PCs
## get.totdiscr <- function(n.pca){
## temp.dapc <- suppressWarnings(dapc(x, grp, n.pca=n.pca, n.da=n.da, dudi=pcaX))
## return(sum(temp.dapc$eig))
## }
## ## GET %SUCCESSFUL OF ACCURATE PREDICTION FOR ALL VALUES ##
## res.all <- sapply(n.pca, get.totdiscr)
## res <- data.frame(n.pca=n.pca, success=res.all)
## return(res)
## } # end discriVal.data.frame
## discriVal.matrix <- discriVal.data.frame
## There's a bunch of problems down there, commenting it for nowé
## xval.dapc <- function(object, n.pca, n.da, training.set = 90, ...){
## training.set = training.set/100
## kept.id <- unlist(tapply(1:nInd(object), pop(object), function(e) {pop.size = length(e); pop.size.train = round(pop.size * training.set); sample(e, pop.size.train, replace=FALSE)})) # this can't work: nInd/pop not defined for DAPC objects
## training <- object[kept.id]
## validating <- object[-kept.id]
## post = vector(mode = 'list', length = n.pca)
## asgn = vector(mode = 'list', length = n.pca)
## ind = vector(mode = 'list', length = n.pca)
## mtch = vector(mode = 'list', length = n.pca)
## for(i in 1:n.pca){
## dapc.base = dapc(training, n.pca = i, n.da = 15) # Why 15??
## dapc.p = predict.dapc(dapc.base, newdata = validating)
## match.prp = mean(as.character(dapc.p$assign)==as.character(pop(validating)))
## post[[i]] = dapc.p$posterior
## asgn[[i]] = dapc.p$assign
## ind[[i]] = dapc.p$ind.score
## mtch[[i]] = match.prp
## }
## res = list(assign = asgn, posterior = post, ind.score = ind, match.prp = mtch)
## return(res)
## } # end of xval.dapc
## xval.genind <- function(object, n.pca, n.da, training.set = 90, ...){
## res = xval.dapc(object = object, n.pca = n.pca, n.da = n.da, training.set = training.set)
## return(res)
## }
## ###############
## ## randtest.dapc
## ###############
## ##randtest.dapc <- function(x, nperm = 999, ...){
## ##} # end randtest.dapc
######## TESTS IN R #######
## TEST PREDICT.DAPC ##
## data(sim2pop)
## temp <- seppop(sim2pop)
## temp <- lapply(temp, function(e) hybridize(e,e,n=30)) # force equal pop sizes
## hyb <- hybridize(temp[[1]], temp[[2]], n=30)
## newdat <- repool(temp[[1]], temp[[2]], hyb)
## pop(newdat) <- rep(c("pop A", "popB", "hyb AB"), c(30,30,30))
## ##dapc1 <- dapc(newdat[1:61],n.pca=10,n.da=1)
## dapc1 <- dapc(newdat[1:60],n.pca=2,n.da=1)
## scatter(dapc1)
## hyb.pred <- predict(dapc1, newdat[61:90])
## scatter(dapc1)
## points(hyb.pred$ind.scores, rep(.1, 30))
## assignplot(dapc1, new.pred=hyb.pred)
## title("30 indiv popA, 30 indiv pop B, 30 hybrids")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.