Nothing
##########
## glSum
##########
## compute col sums
## removing NAs
##
glSum <- function(x, alleleAsUnit=TRUE, useC=FALSE){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
if(useC){
## use ploidy (sum absolute frequencies)
if(alleleAsUnit){
vecbyte <- unlist(lapply(x@gen, function(e) e$snp))
nbVec <- sapply(x@gen, function(e) length(e$snp))
nbNa <- sapply(NA.posi(x), length)
naPosi <- unlist(NA.posi(x))
res <- .C("GLsumInt", vecbyte, nbVec, length(x@gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(x), nLoc(x), ploidy(x),
integer(nLoc(x)), PACKAGE="adegenet")[[9]]
} else {
## sum relative frequencies
vecbyte <- unlist(lapply(x@gen, function(e) e$snp))
nbVec <- sapply(x@gen, function(e) length(e$snp))
nbNa <- sapply(NA.posi(x), length)
naPosi <- unlist(NA.posi(x))
res <- .C("GLsumFreq", vecbyte, nbVec, length(x@gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(x), nLoc(x), ploidy(x),
double(nLoc(x)), PACKAGE="adegenet")[[9]]
}
} else {
## use ploidy (sum absolute frequencies)
if(alleleAsUnit){
res <- integer(nLoc(x))
for(e in x@gen){
temp <- as.integer(e)
temp[is.na(temp)] <- 0L
res <- res + temp
}
} else {
## sum relative frequencies
res <- numeric(nLoc(x))
myPloidy <- ploidy(x)
for(i in 1:nInd(x)){
temp <- as.integer(x@gen[[i]]) / myPloidy[i]
temp[is.na(temp)] <- 0
res <- res + temp
}
}
}
names(res) <- locNames(x)
return(res)
} # glSum
##########
## glNA
##########
## counts NB of NAs per column
##
## if alleleAsUnit, then effective is the number of alleles sampled (sum(ploidy(x)))
## otherwise, effective is simply the number of individuals (
glNA <- function(x, alleleAsUnit=TRUE){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## DEFAULT, VECTOR-WISE PROCEDURE ##
res <- integer(nLoc(x))
temp <- NA.posi(x)
## NAs in allele sampling
if(alleleAsUnit){
for(i in 1:length(temp)){
if(length(temp[[i]])>0){
res[temp[[i]]] <- res[temp[[i]]] + ploidy(x)[i]
}
}
} else { ## NAs amongst individuals
for(e in temp){
if(length(e)>0){
res[e] <- res[e] + 1
}
}
}
names(res) <- locNames(x)
return(res)
} # glNA
##########
## glMean
##########
## computes SNPs means
## takes NAs into account
##
glMean <- function(x, alleleAsUnit=TRUE){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## DEFAULT, VECTOR-WISE PROCEDURE ##
if(alleleAsUnit){ # use alleles
N <- sum(ploidy(x)) - glNA(x, alleleAsUnit=TRUE)
res <- glSum(x, alleleAsUnit=TRUE)/N
} else { # use relative frequencies of individuals
N <- nInd(x) - glNA(x, alleleAsUnit=FALSE)
res <- glSum(x, alleleAsUnit=FALSE)/N
}
names(res) <- locNames(x)
return(res)
} # glMean
########
## glVar
########
## computes SNPs variances
## takes NAs into account
##
glVar <- function(x, alleleAsUnit=TRUE){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## DEFAULT, VECTOR-WISE PROCEDURE ##
res <- numeric(nLoc(x))
myPloidy <- ploidy(x)
if(alleleAsUnit){ # use alleles
N <- sum(ploidy(x)) - glNA(x, alleleAsUnit=TRUE)
xbar <- glMean(x, alleleAsUnit=TRUE)
for(i in 1:nInd(x)){
temp <- (as.integer(x@gen[[i]])/myPloidy[i] - xbar)^2
temp[is.na(temp)] <- 0
res <- res + temp*myPloidy[i]
}
res <- res/N
} else { # use relative frequencies of individuals
N <- nInd(x) - glNA(x, alleleAsUnit=FALSE)
xbar <- glMean(x, alleleAsUnit=FALSE)
for(i in 1:nInd(x)){
temp <- (as.integer(x@gen[[i]])/myPloidy[i] - xbar)^2
temp[is.na(temp)] <- 0L
res <- res + temp
}
res <- res/N
}
names(res) <- locNames(x)
return(res)
} # glVar
#############
## glDotProd
############
## computes all pairs of dot products
## between centred/scaled vectors
## of SNPs
glDotProd <- function(x, center=FALSE, scale=FALSE, alleleAsUnit=FALSE,
parallel=FALSE, n.cores=NULL){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## SOME CHECKS ##
## if(parallel && !require(parallel)) stop("parallel package requested but not installed")
if(parallel && is.null(n.cores)){
n.cores <- parallel::detectCores()
}
## STORE USEFUL INFO ##
N <- nInd(x)
ind.names <- indNames(x)
if(!parallel){ # DO NOT USE MULTIPLE CORES
## GET INPUTS TO C PROCEDURE ##
if(center){
mu <- glMean(x,alleleAsUnit=alleleAsUnit)
} else {
mu <- rep(0, nLoc(x))
}
if(scale){
s <- sqrt(glVar(x,alleleAsUnit=alleleAsUnit))
if(any(s<1e-10)) {
warning("Null variances have been detected; corresponding alleles won't be standardized.")
}
} else {
s <- rep(1, nLoc(x))
}
vecbyte <- unlist(lapply(x@gen, function(e) e$snp))
nbVec <- sapply(x@gen, function(e) length(e$snp))
nbNa <- sapply(NA.posi(x), length)
naPosi <- unlist(NA.posi(x))
lowerTriSize <- (nInd(x)*(nInd(x)-1))/2
resSize <- lowerTriSize + nInd(x)
## CALL C FUNCTION ##
temp <- .C("GLdotProd", vecbyte, nbVec, length(x@gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(x), nLoc(x), ploidy(x),
as.double(mu), as.double(s), as.integer(!alleleAsUnit), double(resSize), PACKAGE="adegenet")[[12]]
} else { # USE MULTIPLE CORES
x <- seploc(x, n.block = n.cores) # one block per core (x is now a list of genlight)
temp <- list()
i <- 0
for(block in x){
i <- i+1
## GET INPUTS TO C PROCEDURE ##
if(center){
mu <- glMean(block,alleleAsUnit=alleleAsUnit)
} else {
mu <- rep(0, nLoc(block))
}
if(scale){
s <- sqrt(glVar(block,alleleAsUnit=alleleAsUnit))
if(any(s<1e-10)) {
warning("Null variances have been detected; corresponding alleles won't be standardized.")
}
} else {
s <- rep(1, nLoc(block))
}
vecbyte <- unlist(lapply(block@gen, function(e) e$snp))
nbVec <- sapply(block@gen, function(e) length(e$snp))
nbNa <- sapply(NA.posi(block), length)
naPosi <- unlist(NA.posi(block))
lowerTriSize <- (nInd(block)*(nInd(block)-1))/2
resSize <- lowerTriSize + nInd(block)
## CALL C FUNCTION ##
temp[[i]] <- .C("GLdotProd", vecbyte, nbVec, length(block@gen[[1]]@snp[[1]]), nbNa, naPosi, nInd(block), nLoc(block), ploidy(block),
as.double(mu), as.double(s), as.integer(!alleleAsUnit), double(resSize), PACKAGE="adegenet")[[12]]
}
## POOL BLOCK RESULTS TOGETHER ##
temp <- Reduce("+", temp)
}
res <- temp[1:lowerTriSize]
attr(res,"Size") <- N
attr(res,"Diag") <- FALSE
attr(res,"Upper") <- FALSE
class(res) <- "dist"
res <- as.matrix(res)
diag(res) <- temp[(lowerTriSize+1):length(temp)]
colnames(res) <- rownames(res) <- ind.names
return(res)
} # end glDotProd
########
## glPca
########
##
## PCA for genlight objects
##
glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsUnit=FALSE,
useC=TRUE, parallel=FALSE, n.cores=NULL,
returnDotProd=FALSE, matDotProd=NULL){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## COMPUTE MEANS AND VARIANCES ##
if(center) {
vecMeans <- glMean(x, alleleAsUnit=alleleAsUnit)
if(any(is.na(vecMeans))) stop("NAs detected in the vector of means")
}
if(scale){
vecVar <- glVar(x, alleleAsUnit=alleleAsUnit)
if(any(is.na(vecVar))) stop("NAs detected in the vector of variances")
}
myPloidy <- ploidy(x)
## NEED TO COMPUTE DOT PRODUCTS ##
if(is.null(matDotProd)){
## == if non-C code is used ==
if(!useC){
## if(parallel && !require(parallel)) stop("parallel package requested but not installed")
if(parallel && is.null(n.cores)){
n.cores <- parallel::detectCores()
}
## COMPUTE DOT PRODUCTS BETWEEN GENOTYPES ##
## to be fast, a particular function is defined for each case of centring/scaling
## NO CENTRING / NO SCALING
if(!center & !scale){
dotProd <- function(a,b, ploid.a, ploid.b){ # a and b are two SNPbin objects
a <- as.integer(a) / ploid.a
a[is.na(a)] <- 0
b <- as.integer(b) / ploid.b
b[is.na(b)] <- 0
return(sum( a*b, na.rm=TRUE))
}
}
## CENTRING / NO SCALING
if(center & !scale){
dotProd <- function(a,b, ploid.a, ploid.b){ # a and b are two SNPbin objects
a <- as.integer(a) / ploid.a
a[is.na(a)] <- vecMeans[is.na(a)]
b <- as.integer(b) / ploid.b
b[is.na(b)] <- vecMeans[is.na(b)]
return(sum( (a-vecMeans) * (b-vecMeans), na.rm=TRUE) )
}
}
## NO CENTRING / SCALING (odd option...)
if(!center & scale){
dotProd <- function(a,b, ploid.a, ploid.b){ # a and b are two SNPbin objects
a <- as.integer(a) / ploid.a
a[is.na(a)] <- 0
b <- as.integer(b) / ploid.b
b[is.na(b)] <- 0
return(sum( (a*b)/vecVar, na.rm=TRUE))
}
}
## CENTRING / SCALING
if(center & scale){
dotProd <- function(a,b, ploid.a, ploid.b){ # a and b are two SNPbin objects
a <- as.integer(a) / ploid.a
a[is.na(a)] <- vecMeans[is.na(a)]
b <- as.integer(b) / ploid.b
b[is.na(b)] <- vecMeans[is.na(b)]
return( sum( ((a-vecMeans)*(b-vecMeans))/vecVar, na.rm=TRUE ) )
}
}
## COMPUTE ALL POSSIBLE DOT PRODUCTS (XX^T / n) ##
allComb <- combn(1:nInd(x), 2)
if(parallel){
allProd <- unlist(parallel::mclapply(1:ncol(allComb), function(i) dotProd(x@gen[[allComb[1,i]]], x@gen[[allComb[2,i]]], myPloidy[allComb[1,i]], myPloidy[allComb[2,i]]),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE))
} else {
allProd <- unlist(lapply(1:ncol(allComb), function(i) dotProd(x@gen[[allComb[1,i]]], x@gen[[allComb[2,i]]], myPloidy[allComb[1,i]], myPloidy[allComb[2,i]]) ))
}
allProd <- allProd / nInd(x) # assume uniform weights
## shape result as a matrix
attr(allProd,"Size") <- nInd(x)
attr(allProd,"Diag") <- FALSE
attr(allProd,"Upper") <- FALSE
class(allProd) <- "dist"
allProd <- as.matrix(allProd)
## compute the diagonal
if(parallel){
temp <- unlist(parallel::mclapply(1:nInd(x), function(i) dotProd(x@gen[[i]], x@gen[[i]], myPloidy[i], myPloidy[i]),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE))/nInd(x)
} else {
temp <- unlist(lapply(1:nInd(x), function(i) dotProd(x@gen[[i]], x@gen[[i]], myPloidy[i], myPloidy[i]) ))/nInd(x)
}
diag(allProd) <- temp
} else { # === use C computations ====
allProd <- glDotProd(x, center=center, scale=scale, alleleAsUnit=alleleAsUnit, parallel=parallel, n.cores=n.cores)/nInd(x)
}
} else { # END NEED TO COMPUTE DOTPROD
if(!all(dim(matDotProd)==nInd(x))) stop("matDotProd has wrong dimensions.")
allProd <- matDotProd
}
## PERFORM THE ANALYSIS ##
## eigenanalysis
eigRes <- eigen(allProd, symmetric=TRUE, only.values=FALSE)
rank <- sum(eigRes$values > 1e-12)
eigRes$values <- eigRes$values[1:rank]
eigRes$vectors <- eigRes$vectors[, 1:rank, drop=FALSE]
## scan nb of axes retained
if(is.null(nf)){
barplot(eigRes$values, main="Eigenvalues", col=heat.colors(rank))
cat("Select the number of axes: ")
nf <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## rescale PCs
res <- list()
res$eig <- eigRes$values
nf <- min(nf, sum(res$eig>1e-10))
##res$matprod <- allProd # for debugging
## use: li = XQU = V\Lambda^(1/2)
eigRes$vectors <- eigRes$vectors * sqrt(nInd(x)) # D-normalize vectors
res$scores <- sweep(eigRes$vectors[, 1:nf, drop=FALSE],2, sqrt(eigRes$values[1:nf]), FUN="*")
## GET LOADINGS ##
## need to decompose X^TDV into a sum of n matrices of dim p*r
## but only two such matrices are represented at a time
if(loadings){
if(scale) {
vecSd <- sqrt(vecVar)
}
res$loadings <- matrix(0, nrow=nLoc(x), ncol=nf) # create empty matrix
## use: c1 = X^TDV
## and X^TV = A_1 + ... + A_n
## with A_k = X_[k-]^T v[k-]
for(k in 1:nInd(x)){
temp <- as.integer(x@gen[[k]]) / myPloidy[k]
if(center) {
temp[is.na(temp)] <- vecMeans[is.na(temp)]
temp <- temp - vecMeans
} else {
temp[is.na(temp)] <- 0
}
if(scale){
temp <- temp/vecSd
}
res$loadings <- res$loadings + matrix(temp) %*% eigRes$vectors[k, 1:nf, drop=FALSE]
}
res$loadings <- res$loadings / nInd(x) # don't forget the /n of X_tDV
res$loadings <- sweep(res$loadings, 2, sqrt(eigRes$values[1:nf]), FUN="/")
}
## FORMAT OUTPUT ##
colnames(res$scores) <- paste("PC", 1:nf, sep="")
if(!is.null(indNames(x))){
rownames(res$scores) <- indNames(x)
} else {
rownames(res$scores) <- 1:nInd(x)
}
if(!is.null(res$loadings)){
colnames(res$loadings) <- paste("Axis", 1:nf, sep="")
if(!is.null(locNames(x)) & !is.null(alleles(x))){
rownames(res$loadings) <- paste(locNames(x),alleles(x), sep=".")
} else {
rownames(res$loadings) <- 1:nLoc(x)
}
}
if(returnDotProd){
res$dotProd <- allProd
rownames(res$dotProd) <- colnames(res$dotProd) <- indNames(x)
}
res$call <- match.call()
class(res) <- "glPca"
return(res)
} # glPca
###############
## print.glPca
###############
#' @method print glPca
#' @export
print.glPca <- function(x, ...){
cat(" === PCA of genlight object ===")
cat("\nClass: list of type glPca")
cat("\nCall ($call):")
print(x$call)
cat("\nEigenvalues ($eig):\n", round(head(x$eig,6),3), ifelse(length(x$eig)>6, "...\n", "\n") )
cat("\nPrincipal components ($scores):\n matrix with", nrow(x$scores), "rows (individuals) and", ncol(x$scores), "columns (axes)", "\n")
if(!is.null(x$loadings)){
cat("\nPrincipal axes ($loadings):\n matrix with", nrow(x$loadings), "rows (SNPs) and", ncol(x$loadings), "columns (axes)", "\n")
}
if(!is.null(x$dotProd)){
cat("\nDot products between individuals ($dotProd):\n matrix with", nrow(x$dotProd), "rows and", ncol(x$dotProd), "columns", "\n")
}
cat("\n")
}
#################
## scatter.glPca
#################
#' @method scatter glPca
#' @export
scatter.glPca <- function(x, xax=1, yax=2, posi="bottomleft", bg="white", ratio=0.3,
label = rownames(x$scores), clabel = 1, xlim = NULL, ylim = NULL,
grid = TRUE, addaxes = TRUE, origin = c(0,0), include.origin = TRUE,
sub = "", csub = 1, possub = "bottomleft", cgrid = 1,
pixmap = NULL, contour = NULL, area = NULL, ...){
## set par
opar <- par(mar = par("mar"))
par(mar = c(0.1, 0.1, 0.1, 0.1), bg=bg)
on.exit(par(opar))
axes <- c(xax,yax)
## basic empty plot
## s.label(x$ind.coord[,axes], clab=0, cpoint=0, grid=FALSE, addaxes = FALSE, cgrid = 1, include.origin = FALSE, ...)
s.label(x$scores[,axes], label = label, clabel = clabel, xlim = xlim, ylim = ylim, grid = grid, addaxes = addaxes,
origin = origin, include.origin = include.origin, sub = sub, csub = csub, possub = possub, cgrid = cgrid,
pixmap = pixmap, contour = contour, area = area)
if(ratio>0.001) {
add.scatter.eig(x$eig, ncol(x$scores), axes[1], axes[2], posi=posi, ratio=ratio, csub=csub)
}
return(invisible(match.call()))
} # end scatter.glPca
#####################
## loadingplot.glPca
#####################
#' @method loadingplot glPca
#' @export
loadingplot.glPca <- function(x, at=NULL, threshold=NULL, axis=1, fac=NULL, byfac=FALSE,
lab=rownames(x$loadings), cex.lab=0.7, cex.fac=1, lab.jitter=0,
main="Loading plot", xlab="SNP positions", ylab="Contributions", srt=90, adj=c(0,0.5), ... ){
if(is.null(x$loadings)){
warning("This object does not contain loadings. Re-run the analysis, specifying 'loadings=TRUE'.")
return(invisible())
}
if(is.null(at)){
at <- as.integer(gsub("[.]+.+$", "", rownames(x$loadings)))
}
if(is.null(threshold)){
threshold <- quantile(x$loadings[,axis]^2,0.75)
}
res <- loadingplot.default(x$loadings^2, at=at, threshold=threshold, axis=axis, fac=fac, byfac=byfac,
lab=lab, cex.lab=cex.lab, cex.fac=cex.fac, lab.jitter=lab.jitter,
main=main, xlab=xlab, ylab=ylab, srt=srt, adj=adj, ...)
axis(1)
return(invisible(res))
} # end loadingplot.glPca
###############
## .glPca2dudi
###############
.glPca2dudi <- function(x){
if(!inherits(x,"glPca")) stop("x is not a glPca object")
old.names <- names(x)
new.names <- sub("scores","li", old.names)
new.names <- sub("loadings","c1", new.names)
names(x) <- new.names
class(x) <- c("pca","dudi")
return(x)
} # end glPca2dudi
## TESTING ##
## x <- new("genlight", list(c(0,0,1,1,0), c(1,1,1,0,0,1), c(2,1,1,1,1,NA)))
## as.matrix(x)
## glNA(x)
## glSum(x)
## glMean(x)
## same ploidy everywhere
## x <- new("genlight", list(c(0,0,1,1,0), c(1,1,1,0,0,1), c(0,0,0,1,1,1)))
## f1 <- function(e) {return(mean((e-mean(e, na.rm=TRUE))^2, na.rm=TRUE))}
## all.equal(glVar(x), apply(as.matrix(x), 2, f1 )) # MUST BE TRUE
## all.equal(glVar(x,FALSE), apply(as.matrix(x), 2, f1 )) # MUST BE TRUE
## ## differences in ploidy
## x <- new("genlight", list(c(0,0,1,1,0), c(1,1,1,0,0,1), c(2,1,1,1,1,NA)))
## temp <- sweep(as.matrix(x), 1, c(1,1,2), "/")
## f2 <- function(e,w) {
## mu <- weighted.mean(e, w, na.rm=TRUE)
## res <- weighted.mean((e-mu)^2, w, na.rm=TRUE)
## return(res)
## }
## all.equal(glMean(x), apply(temp,2,weighted.mean, w=c(1,1,2), na.rm=TRUE)) # MUST BE TRUE
## all.equal(glVar(x), apply(temp, 2, f2,w=c(1,1,2) )) # MUST BE TRUE
## all.equal(glMean(x,FALSE), apply(temp,2,mean,na.rm=TRUE)) # MUST BE TRUE
## all.equal(glVar(x,FALSE), apply(temp,2,f1)) # MUST BE TRUE
#### TESTING DOT PRODUCTS ####
## M <- matrix(sample(c(0,1), 100*1e3, replace=TRUE), nrow=100)
## rownames(M) <- paste("ind", 1:100)
## x <- new("genlight",M)
## ## not centred, not scaled
## res1 <- glDotProd(x,alleleAsUnit=FALSE, center=FALSE, scale=FALSE)
## res2 <- M %*% t(M)
## all.equal(res1,res2) # must be TRUE
## ## centred, not scaled
## res1 <- glDotProd(x,alleleAsUnit=FALSE, center=TRUE, scale=FALSE)
## M <- scalewt(M,center=TRUE,scale=FALSE)
## res2 <- M %*% t(M)
## all.equal(res1,res2) # must be TRUE
## ## centred, scaled
## res1 <- glDotProd(x,alleleAsUnit=FALSE, center=TRUE, scale=TRUE)
## M <- scalewt(M,center=TRUE,scale=TRUE)
## res2 <- M %*% t(M)
## all.equal(res1,res2) # must be TRUE
#### TESTING PCA ####
## M <- matrix(sample(c(0,1), 20*1000, replace=TRUE), nrow=20)
## rownames(M) <- paste("ind", 1:20)
## x <- new("genlight",M)
## res1 <- glPca(x, nf=4)
## res2 <- glPca(x, useC=FALSE, nf=4)
## res3 <- dudi.pca(M, center=TRUE,scale=FALSE, scannf=FALSE,nf=4)
## ## all must be TRUE
## all.equal(res1$eig,res3$eig)
## all.equal(res2$eig,res3$eig)
## all.equal(res1$eig,res2$eig)
## all(abs(res1$scores)-abs(res3$li)<1e-8)
## all(abs(res2$scores)-abs(res3$li)<1e-8)
## all(abs(res1$scores)-abs(res2$scores)<1e-8)
## all(abs(res1$loadings)-abs(res3$c1)<1e-8)
## all(abs(res2$loadings)-abs(res3$c1)<1e-8)
## all(abs(res1$loadings)-abs(res2$loadings)<1e-8)
## ## perform ordinary PCA
## titi <- dudi.pca(M, center=TRUE, scale=FALSE, scannf=FALSE, nf=4)
## ## check results
## all(round(abs(toto$scores), 10) == round(abs(titi$li), 10)) # MUST BE TRUE
## all.equal(toto$eig, titi$eig) # MUST BE TRUE
## all(round(abs(toto$loadings), 10)==round(abs(titi$c1), 10)) # MUST BE TRUE
## TEST WITH NAS ##
## M <- matrix(sample(c(0,1, NA), 1e5, replace=TRUE, prob=c(.495,.495,.01)), nrow=100)
## rownames(M) <- paste("ind", 1:100)
## ## perform glPca
## x <- new("genlight",M)
## toto <- glPca(x, nf=4)
## round(cor(toto$scores),10) # must be diag(1,4)
## round(t(toto$loadings) %*% toto$loadings,10) # must be diag(1,4)
## ## SPEED TESTS ##
## ## perform glPca
## M <- matrix(sample(c(0,1), 100*1e5, replace=TRUE), nrow=100)
## x <- new("genlight",M)
## system.time(titi <- dudi.pca(M,center=TRUE,scale=FALSE, scannf=FALSE, nf=4)) # 92 sec
## system.time(toto <- glPca(x, ,center=TRUE,scale=FALSE, useC=TRUE, nf=4)) # 102 sec
## M <- matrix(sample(c(0,1), 200*1e5, replace=TRUE), nrow=200)
## x <- new("genlight",M)
## system.time(titi <- dudi.pca(M,center=TRUE,scale=FALSE, scannf=FALSE, nf=4)) # 109 sec
## system.time(toto <- glPca(x, ,center=TRUE,scale=FALSE, useC=TRUE, nf=4)) # 360 sec
## M <- matrix(sample(c(0,1), 100*5e5, replace=TRUE), nrow=500)
## x <- new("genlight",M)
## system.time(titi <- dudi.pca(M,center=TRUE,scale=FALSE, scannf=FALSE, nf=4)) # MEM LIMIT ISSUE
## system.time(toto <- glPca(x, ,center=TRUE,scale=FALSE, useC=TRUE, nf=4)) # sec
## USE R PROFILING ##
## for glPca
## M <- matrix(sample(c(0,1), 100*1e5, replace=TRUE), nrow=100)
## x <- new("genlight",M)
## Rprof("glPca-prof.log")
## toto <- glPca(x, ,center=TRUE,scale=FALSE, useC=TRUE, nf=4) # 102 sec
## Rprof(NULL)
## res <- summaryRprof("glPca-prof.log")
## t <- res$by.total$total.time
## names(t) <- rownames(res$by.total)
## par(mar=c(7,4,4,2))
## barplot(t,las=3, cex.names=.7)
## ## for dudi.pca
## M <- matrix(sample(c(0,1), 100*1e5, replace=TRUE), nrow=100)
## Rprof("dudipca-prof.log")
## toto <- dudi.pca(M ,center=TRUE,scale=FALSE, scannf=FALSE, nf=4) # 102 sec
## Rprof(NULL)
## res <- summaryRprof("dudipca-prof.log")
## t <- res$by.total$total.time
## names(t) <- rownames(res$by.total)
## par(mar=c(7,4,4,2))
## barplot(t,las=3, cex.names=.7)
## test GLsum:
## library(adegenet)
## x <- new("genlight", lapply(1:50, function(i) sample(c(0,1,NA), 1e5, prob=c(.5, .49, .01), replace=TRUE)))
## res1 <- glSum(x, useC=FALSE)
## res2 <- glSum(x, useC=TRUE)
## res3 <- apply(as.matrix(x),2,sum,na.rm=TRUE)
## all(res1==res3) # must be TRUE
## all(res2==res3) # must be TRUE
## library(adegenet)
## x <- new("genlight", lapply(1:50, function(i) sample(c(0,1,2,NA), 1e5, prob=c(.5, .40, .09, .01), replace=TRUE)))
## res1 <- glSum(x, alleleAsUnit=FALSE, useC=FALSE)
## res2 <- glSum(x, alleleAsUnit=FALSE, useC=TRUE)
## res3 <- apply(as.matrix(x)/ploidy(x),2,sum,na.rm=TRUE)
## all.equal(res1,res3)
## all.equal(res2,res3)
## TEST PARALLELE C COMPUTATIONS IN GLDOTPROD
## system.time(toto <- glDotProd(x,multi=TRUE)) # 58 sec: cool!
## system.time(titi <- glDotProd(x,multi=FALSE)) # 245 sec
## all.equal(toto,titi)
## TEST PARALLELE C COMPUTATIONS IN GLPCA ##
## first dataset
## x <- new("genlight", lapply(1:50, function(i) sample(c(0,1,2,NA), 1e5, prob=c(.5, .40, .09, .01), replace=TRUE)))
## system.time(pca1 <- glPca(x, multi=FALSE, useC=FALSE, nf=1)) # no C, no parallel: 43 sec
## system.time(pca2 <- glPca(x, multi=FALSE, useC=TRUE, nf=1)) # just C: 248 sec
## system.time(pca3 <- glPca(x, multi=TRUE, useC=FALSE, nf=1, n.core=7)) # just parallel: 16 sec
## system.time(pca4 <- glPca(x, multi=TRUE, useC=TRUE, nf=1, n.core=7)) # C and parallel: 65 sec
## all.equal(pca1$scores^2, pca2$scores^2) # must be TRUE
## all.equal(pca1$scores^2, pca3$scores^2) # must be TRUE
## all.equal(pca1$scores^2, pca4$scores^2) # must be TRUE
## second dataset
## x <- new("genlight", lapply(1:500, function(i) sample(c(0,1,2,NA), 1e4, prob=c(.5, .40, .09, .01), replace=TRUE)))
## system.time(pca1 <- glPca(x, multi=FALSE, useC=FALSE, nf=1)) # no C, no parallel: 418 sec
## system.time(pca2 <- glPca(x, multi=FALSE, useC=TRUE, nf=1)) # just C: 496 sec
## system.time(pca3 <- glPca(x, multi=TRUE, useC=FALSE, nf=1, n.core=7)) # just parallel: 589 sec
## system.time(pca4 <- glPca(x, multi=TRUE, useC=TRUE, nf=1, n.core=7)) # C and parallel: 315 sec
## all.equal(pca1$scores^2, pca2$scores^2) # must be TRUE
## all.equal(pca1$scores^2, pca3$scores^2) # must be TRUE
## all.equal(pca1$scores^2, pca4$scores^2) # must be TRUE
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.