Nothing
###############
##
## CLASSES
##
###############
###############
## SNPbin class
###############
setClass("SNPbin", representation(snp = "list",
n.loc = "integer",
NA.posi = "integer",
label = "charOrNULL",
ploidy = "integer"),
prototype(snp = list(), n.loc = 0L, label = NULL, ploidy = 1L))
###############
## genlight class
###############
setClass("genlight", representation(gen = "list",
n.loc = "integer",
ind.names = "charOrNULL",
loc.names = "charOrNULL",
loc.all = "charOrNULL",
chromosome = "factorOrNULL",
position = "intOrNULL",
ploidy = "intOrNULL",
pop = "factorOrNULL",
strata = "dfOrNULL",
hierarchy = "formOrNULL",
other = "list"),
prototype(gen = list(), n.loc = 0L, ind.names = NULL, loc.names = NULL, loc.all = NULL,
chromosome = NULL, position = NULL, strata = NULL, hierarchy = NULL, ploidy=NULL, pop=NULL, other=list()))
#####################
##
## CONSTRUCTORS
##
#####################
####################
## SNPbin constructor
####################
setMethod("initialize", "SNPbin", function(.Object, ...) {
x <- .Object
input <- list(...)
if(length(input)==1) names(input) <- "snp"
if(length(input)>1 && ! "snp" %in% names(input)) names(input)[1] <- "snp"
## handle snp data ##
if(!is.null(input$snp) && length(input$snp)>0 && !all(is.na(input$snp))){
## a vector of raw is provided
if(is.raw(input$snp)){
x@snp <-list(input$snp)
}
## a list of raw vectors is provided
if(is.list(input$snp)){
if(all(sapply(input$snp, class)=="raw")){
x@snp <- input$snp
}
}
## a numeric/integer vector is provided
## conversion from a vector of 0/1 (integers)
if(is.numeric(input$snp) | is.integer(input$snp)){
input$snp <- as.integer(input$snp)
## determine ploidy
if(is.null(input$ploidy)){
input$ploidy <- max(input$snp, na.rm=TRUE)
if(input$ploidy==0) input$ploidy <- 1
}
input$ploidy <- as.integer(input$ploidy)
if(input$ploidy<1) stop("Ploidy is less than 1")
## check values in the vector
if(any(input$snp<0 | input$snp>input$ploidy, na.rm=TRUE)){
stop("Values of SNPs < 0 or > ploidy")
}
## handle ploidy (may have to split info into binary vectors)
x@snp <- list()
i <- max(input$snp, na.rm=TRUE) # determine max nb of alleles
if(i > 1){ # haplotype can be 0/1/2/...
j <- 0 # index for the length of the list @snp
while(i > 0){
j <- j+1 # update length of the result
temp <- as.integer(input$snp==i)
x@snp[[j]] <- .bin2raw(temp)$snp # make a vector of 1
input$snp <- input$snp-temp # deflate data (subtract the recoded alleles)
i <- max(input$snp, na.rm=TRUE) # update the max nb of alleles
}
} else { # haplotype is only 0/1/NA
x@snp[[1]] <- .bin2raw(input$snp)$snp
}
x@n.loc <- length(input$snp)
x@NA.posi <- which(is.na(input$snp))
x@ploidy <- as.integer(input$ploidy)
return(x)
}
}
## handle full-NA data
if(!is.null(input$snp) && all(is.na(input$snp))){
x@snp <- list()
x@n.loc <- length(input$snp)
x@snp[[1]] <- .bin2raw(rep(0L, length(input$snp)))$snp
x@NA.posi <- 1:length(input$snp)
if(!is.null(input$ploidy)){
x@ploidy <- as.integer(input$ploidy)
} else {
x@ploidy <- as.integer(NA)
}
return(x)
}
## handle n.loc ##
if(!is.null(input$n.loc)){
x@n.loc <- as.integer(input$n.loc)
} else {
if(!is.null(input$snp)){
warning("number of SNPs (n.loc) not provided to the genlight constructor - using the maximum number given data coding.")
x@n.loc <- as.integer(length(x@snp)*8)
} else {
x@n.loc <- 0L
}
}
## handle NA.posi ##
if(!is.null(input$NA.posi)){
x@NA.posi <- as.integer(input$NA.posi)
}
## handle ploidy ##
if(!is.null(input$ploidy)){
x@ploidy <- as.integer(input$ploidy)
}
return(x)
}) # end SNPbin constructor
########################
## genlight constructor
########################
setMethod("initialize", "genlight", function(.Object, ..., parallel=FALSE, n.cores=NULL) {
if(parallel && !require(parallel)) stop("parallel package requested but not installed")
if(parallel && is.null(n.cores)){
n.cores <- parallel::detectCores()
}
if( .Platform$OS.type == "windows" ){
n.cores <- 1
}
x <- .Object
input <- list(...)
if(length(input)==1 && is.null(names(input))) names(input) <- "gen"
if(length(input)>1 && ! "gen" %in% names(input)) names(input)[1] <- "gen"
## HANDLE INPUT$GEN ##
if(!is.null(input$gen)){
## input$gen is a list of SNPbin ##
if(is.list(input$gen) && all(sapply(input$gen, class)=="SNPbin")){
## check nb of loci in each SNPbin
if(length(unique(sapply(input$gen, nLoc)))>1) {
warning("SNPbin objects have different numbers of loci")
input$gen <- lapply(input$gen, as.integer)
} else { # all seems fine
x@gen <- input$gen
if(is.null(input$ind.names)){
input$ind.names <- names(input$gen)
}
}
}
## input$gen is a matrix or a data.frame
if((is.matrix(input$gen) & !inherits(input$gen,"snp.matrix")) | is.data.frame(input$gen)){
if(is.null(input$ind.names)){
input$ind.names <- rownames(input$gen)
}
if(is.null(input$loc.names)){
input$loc.names <- colnames(input$gen)
if(is.data.frame(input$gen)){ # do not use names if these are the default names of a data.frame
if(identical(colnames(input$gen), paste("V", 1:ncol(input$gen), sep=""))){
input$loc.names <- NULL
}
}
}
##input$gen <- lapply(1:nrow(input$gen), function(i) as.integer(input$gen[i,]))
if(parallel){
x@gen <- parallel::mclapply(1:nrow(input$gen), function(i) new("SNPbin", as.integer(input$gen[i,])),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x@gen <- lapply(1:nrow(input$gen), function(i) new("SNPbin", as.integer(input$gen[i,])) )
}
}
## input$gen is a list of integers/numeric ##
if(is.list(input$gen) && !is.data.frame(input$gen) && all(sapply(input$gen, class) %in% c("integer","numeric"))){
## check length consistency
lengthvec <- sapply(input$gen, length)
## complete with NA is necessary
if(length(unique(lengthvec))>1) {
warning("Genotypes have variable length; completing shorter ones with NAs.")
for(i in 1:length(input$gen)){
input$gen[[i]] <- c(input$gen[[i]], rep(NA, max(lengthvec)-length(input$gen[[i]])))
}
}
## name individuals if needed
if(is.null(input$ind.names)){
input$ind.names <- names(input$gen)
}
## create SNPbin list
if(parallel){
x@gen <- parallel::mclapply(input$gen, function(e) new("SNPbin",e), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x@gen <- lapply(input$gen, function(e) new("SNPbin",e))
}
}
## input$gen is a snp.matrix object ##
if(inherits(input$gen,"snp.matrix")){
if(!require(snpMatrix)){
cat("\nThe package snp.matrix is needed for this conversion.")
cat("\nTo install it, type:")
cat("\n source(\"http://bioconductor.org/biocLite.R\")")
cat("\n biocLite(\"snpMatrix\")\n")
x@gen <- NULL
} else {
## function to convert one indiv
f1 <- function(x){
res <- as.integer(x)
res[res==0] <- NA
res <- res-1
return(new("SNPbin", as.integer(res), ploidy=2L))
}
## create SNPbin list
if(parallel){
x@gen <- parallel::mclapply(1:nrow(input$gen), function(i) f1(input$gen[i,]), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x@gen <- lapply(1:nrow(input$gen), function(i) f1(input$gen[i,]))
}
## handle names
if(is.null(input$ind.names)) {input$ind.names <- rownames(input$gen)}
if(is.null(input$loc.names)) {input$loc.names <- colnames(input$gen)}
}
}
}
if(length(x@gen) > 0) { # if non-emtpy object
## HANDLE INPUT$IND.NAMES ##
if(!is.null(input$ind.names)){
input$ind.names <- as.character(input$ind.names)
## check length consistency
if(length(input$ind.names) != nInd(x)){
warning("Inconsistent length for ind.names - storing this argument in @other.")
if(is.null(input$other)) {
input$other <- list(ind.names.wrong.length=input$ind.names)
} else {
input$other$ind.names.wrong.length <- input$ind.names
}
} else {
## assign value to the output object
x@ind.names <- input$ind.names
## ## name list and each SNPbin ## THIS DUPLICATES THE INFORMATION
## names(x@gen) <- input$ind.names
## for(i in 1:length(x@gen)){
## x@gen[[i]]@label <- input$ind.names[i]
## }
}
}
## HANDLE INPUT$N.LOC ##
if(!is.null(input$n.loc)){ # n.loc is provided
input$n.loc <- as.integer(input$n.loc)
## check length consistency
if(input$n.loc != nLoc(x@gen[[1]])) {
warning("Inconsistent number of loci (n.loc) - ignoring this argument.")
} else {
x@n.loc <- input$n.loc
}
} else { # n.loc is not provided
x@n.loc <- nLoc(x@gen[[1]])
}
## HANDLE INPUT$PLOIDY ##
## note: if not provided, @ploidy is NULL (saves some space)
if(!is.null(input$ploidy)){ # ploidy is provided
input$ploidy <- as.integer(input$ploidy)
input$ploidy <- rep(input$ploidy, length=length(x@gen))
x@ploidy <- input$ploidy
}
## HANDLE INPUT$LOC.NAMES ##
if(!is.null(input$loc.names) && length(input$loc.names)>0){ # ploidy is provided
input$loc.names <- as.character(input$loc.names)
## check length consistency
if(length(input$loc.names) != x@n.loc){ # if problem, store in @other
warning("Inconsistent length for loc.names - storing this argument in @other.")
if(is.null(input$other)) {
input$other <- list(loc.names.wrong.length=input$loc.names)
} else {
input$other$loc.names.wrong.length <- input$loc.names
}
} else {
x@loc.names <- input$loc.names
}
}
## HANDLE INPUT$LOC.ALL ##
if(!is.null(input$loc.all) && length(input$loc.all)>0){ # ploidy is provided
input$loc.all <- as.character(input$loc.all)
## check length consistency
if(length(input$loc.all) != x@n.loc){
warning("Inconsistent length for loc.all - storing this argument in @other.")
if(is.null(input$other)) {
input$other <- list(loc.all.wrong.length=input$loc.all)
} else {
input$other$loc.all.wrong.length <- input$loc.all
}
} else {
## check string consistency (format is e.g. "a/t")
temp <- grep("^[[:alpha:]]{1}/[[:alpha:]]{1}$", input$loc.all)
if(any(! 1:nLoc(x@gen[[1]]) %in% temp)){ # if problem, store in @other
## input$loc.all <- gsub("[[:space:]]","", input$loc.all)
warning("Miss-formed strings in loc.all (must be e.g. 'c/g') - storing this argument in @other.")
if(is.null(input$other)) {
input$other <- list(loc.all.misformed=input$loc.all)
} else {
input$other$loc.all.misformed <- input$loc.all
}
} else {
x@loc.all <- input$loc.all
}
}
}
## HANDLE CHROMOSOME ##
if(!is.null(input$chromosome)){
if(length(input$chromosome) != x@n.loc) { # if wrong length, store in @other
warning("chromosome argument has inconsistent length - storing this argument in @other")
if(is.null(input$other)) {
input$other <- list(chromosome.wrong.length=input$chromosome)
} else {
input$other$chromosome.wrong.length <- input$chromosome
}
} else {
x@chromosome <- factor(input$chromosome)
}
}
## HANDLE POSITION ##
if(!is.null(input$position)){
if(length(input$position) != x@n.loc) { # if wrong length, store in @other
warning("position argument has inconsistent length - storing this argument in @other")
if(is.null(input$other)) {
input$other <- list(position.wrong.length=input$position)
} else {
input$other$position.wrong.length <- input$position
}
} else {
x@position <- as.integer(input$position)
}
}
## HANDLE INPUT$POP ##
if(!is.null(input$pop)){
## check length consistency
if(length(input$pop) != nInd(x)){
warning("Inconsistent length for pop - ignoring this argument.")
if(is.null(input$other)) {
input$other <- list(pop.wrong.length=input$pop)
} else {
input$other$pop.wrong.length <- input$pop
}
} else {
x@pop <- factor(input$pop)
}
}
## HANDLE INPUT$STRATA ##
if(!is.null(input$strata)){
## check length consistency
if(nrow(input$strata) != nInd(x)){
warning("Inconsistent length for strata - ignoring this argument.")
if(is.null(input$other)) {
input$other <- list(strata.wrong.length=input$strata)
} else {
input$other$strata.wrong.length <- input$strata
}
} else {
# Make sure that the hierarchies are factors.
x@strata <- data.frame(lapply(input$strata, function(f) factor(f, unique(f))))
if(!is.null(x@ind.names)){
rownames(x@strata) <- x@ind.names
}
}
}
## HANDLE INPUT$HIERARCHY ##
if (!is.null(x@strata) && !is.null(input$hierarchy)){
if (is.language(input$hierarchy)){
the_names <- all.vars(input$hierarchy)
if (all(the_names %in% names(x@strata))){
## TODO: CHECK HIERARCHY HERE
x@hierarchy <- input$hierarchy
} else {
warning("hierarchy names do not match names of strata. Setting slot to NULL")
x@hierarchy <- NULL
}
} else {
warning("hierarchy must be a formula. Setting slot to NULL.")
x@hierarchy <- NULL
}
}
} # end if non-empty @gen
## HANDLE INPUT$OTHER ##
if(!is.null(input$other)){
x@other <- input$other
}
## RETURN OBJECT ##
names(x@gen) <- NULL # do not store ind.names twice
return(x)
}) # end genlight constructor
################################
##
## METHODS AND ACCESSORS
##
################################
###############
## show SNPbin
###############
setMethod ("show", "SNPbin", function(object){
cat("/// SNPBIN OBJECT /////////")
if(!is.null(object@label)) {
cat("\n", object@label)
}
cat("\n", format(nLoc(object), big.mark=","), "SNPs coded as bits, size:", format(object.size(object), units="auto"))
cat("\n Ploidy:", object@ploidy)
temp <- round(length(object@NA.posi)/nLoc(object) *100,2)
cat("\n ", length(object@NA.posi), " (", temp," %) missing data\n", sep="")
}) # end show method
#################
## show genlight
#################
setMethod ("show", "genlight", function(object){
## HEADER
cat(" /// GENLIGHT OBJECT /////////")
cat("\n\n //", format(nInd(object), big.mark=","), "genotypes, ",
format(nLoc(object), big.mark=","), "binary SNPs, size:", format(object.size(object), units="auto"))
temp <- sapply(object@gen, function(e) length(e@NA.posi))
if(length(temp>1)){
cat("\n ", sum(temp), " (", round((sum(temp)/(nInd(object)*nLoc(object))) *100,2)," %) missing data", sep="")
}
## BASIC CONTENT
cat("\n\n // Basic content")
cat("\n @gen: list of", length(object@gen), "SNPbin")
if(!is.null(object@ploidy)){
ploidytxt <- paste("(range: ", paste(range(object@ploidy), collapse="-"), ")", sep="")
cat("\n @ploidy: ploidy of each individual ", ploidytxt)
}
## OPTIONAL CONTENT
cat("\n\n // Optional content")
optional <- FALSE
if(!is.null(object@ind.names)){
optional <- TRUE
cat("\n @ind.names: ", length(object@ind.names), "individual labels")
}
if(!is.null(object@loc.names)){
optional <- TRUE
cat("\n @loc.names: ", length(object@loc.names), "locus labels")
}
if(!is.null(object@loc.all)){
optional <- TRUE
cat("\n @loc.all: ", length(object@loc.all), "alleles")
}
if(!is.null(object@chromosome)){
optional <- TRUE
cat("\n @chromosome: factor storing chromosomes of the SNPs")
}
if(!is.null(object@position)){
optional <- TRUE
cat("\n @position: integer storing positions of the SNPs")
}
if(!is.null(object@pop)){
optional <- TRUE
poptxt <- paste("(group size range: ", paste(range(table(object@pop)), collapse="-"), ")", sep="")
cat("\n @pop:", paste("population of each individual", poptxt))
}
if (!is.null(object@strata)){
optional <- TRUE
cat("\n @strata: ")
levs <- names(object@strata)
if (length(levs) > 6){
levs <- paste(paste(head(levs), collapse = ", "), "...", sep = ", ")
} else {
levs <- paste(levs, collapse = ", ")
}
cat("a data frame with", length(object@strata), "columns (", levs, ")")
}
if (!is.null(object@hierarchy)){
optional <- TRUE
cat("\n @hierarchy:", paste(object@hierarchy, collapse = ""))
}
if(!is.null(object@other)){
optional <- TRUE
cat("\n @other: ")
cat("a list containing: ")
cat(ifelse(is.null(names(object@other)), "elements without names", paste(names(object@other), collapse= " ")), "\n")
}
if(!optional) cat("\n - empty -")
cat("\n")
}) # end show method
############
## accessors
############
## nLoc
setMethod("nLoc","SNPbin", function(x,...){
return(x@n.loc)
})
setMethod("nLoc","genlight", function(x,...){
return(x@n.loc)
})
## nInd
setMethod("nInd","genlight", function(x,...){
return(length(x@gen))
})
## nPop
setMethod("nPop","genlight", function(x,...){
return(length(levels(pop(x))))
})
setMethod("dim", "genlight", function(x){
return(c(nInd(x), nLoc(x)))
})
## $
setMethod("$","SNPbin",function(x,name) {
return(slot(x,name))
})
setMethod("$","genlight",function(x,name) {
return(slot(x,name))
})
setMethod("$<-","SNPbin",function(x,name,value) {
slot(x,name,check=TRUE) <- value
return(x)
})
setMethod("$<-","genlight",function(x,name,value) {
slot(x,name,check=TRUE) <- value
return(x)
})
## names
setMethod("names", signature(x = "SNPbin"), function(x){
return(slotNames(x))
})
setMethod("names", signature(x = "genlight"), function(x){
return(slotNames(x))
})
## ploidy
setMethod("ploidy","SNPbin", function(x,...){
return(x@ploidy)
})
setMethod("ploidy","genlight", function(x,...){
if(nInd(x)>0){
if(!is.null(x@ploidy)){
res <- x@ploidy
} else {
res <- sapply(x@gen, function(e) e@ploidy)
}
names(res) <- x@ind.names
return(res)
} else {
return(NULL)
}
})
setReplaceMethod("ploidy","SNPbin",function(x,value) {
if(is.null(value)){
slot(x, "ploidy", check=TRUE) <- value
return(x)
}
value <- as.integer(value)
if(any(value)<1) stop("Negative or null values provided")
if(any(is.na(value))) stop("NA values provided")
if(length(value)>1) warning("Several ploidy numbers provided; using only the first integer")
slot(x,"ploidy",check=TRUE) <- value[1]
return(x)
})
setReplaceMethod("ploidy","genlight",function(x,value) {
if(is.null(value)){
slot(x, "ploidy", check=TRUE) <- value
return(x)
}
value <- as.integer(value)
if(any(value)<1) stop("Negative or null values provided")
if(any(is.na(value))) stop("NA values provided")
if(length(value) == 1) value <- rep(value, length=nInd(x))
if(length(value) != nInd(x)) stop("Length of the provided vector does not match nInd(x)")
slot(x,"ploidy",check=TRUE) <- value
return(x)
})
## locNames
setMethod("locNames","genlight", function(x,...){
## if loc names provided, return them
if(!is.null(x@loc.names)) return(x@loc.names)
## otherwise, look for position / alleles
if(!is.null(res <- position(x))){
if(!is.null(alleles(x))){
res <- paste(res, alleles(x), sep=".")
} else { # force position to be character
res <- as.character(res)
}
return(res)
}
})
setReplaceMethod("locNames","genlight",function(x,value) {
if(is.null(value)){
slot(x, "loc.names", check=TRUE) <- value
return(x)
}
if(length(value) != nLoc(x)) stop("Vector length does no match number of loci")
slot(x,"loc.names",check=TRUE) <- as.character(value)
return(x)
})
## indNames
setMethod("indNames","genlight", function(x,...){
if(length(x@ind.names)==0) return(NULL)
return(x@ind.names)
})
setReplaceMethod("indNames","genlight",function(x,value) {
if(is.null(value)){
slot(x, "ind.names", check=TRUE) <- value
return(x)
}
value <- as.character(value)
if(length(value) != nInd(x)) stop("Vector length does no match number of individuals")
slot(x,"ind.names",check=TRUE) <- value
return(x)
})
## popNames
setMethod("popNames","genlight", function(x,...){
if(length(levels(pop(x)))==0) return(NULL)
return(levels(pop(x)))
})
setReplaceMethod("popNames","genlight",function(x,value) {
if (is.null(value) || any(is.na(value))){
stop("Can't set population names to NULL or NA")
return(x)
}
value <- as.character(value)
if(length(value) != length(levels(pop(x)))){
stop("Vector length does no match number of populations")
}
levels(pop(x)) <- value
return(x)
})
## alleles
setMethod("alleles","genlight", function(x,...){
if(length(x@loc.all)==0) return(NULL)
return(x@loc.all)
})
setReplaceMethod("alleles","genlight", function(x, value){
if(is.null(value)){
slot(x, "loc.all", check=TRUE) <- value
return(x)
}
value <- as.character(value)
if(length(value)!=nLoc(x)) stop("replacement vector must be of length nLoc(x)")
temp <- grep("^[[:alpha:]]{1}/[[:alpha:]]{1}$", value)
if(any(! 1:nLoc(x) %in% temp)) stop("Miss-formed strings in replacement (must be e.g. 'c/g')")
slot(x, "loc.all", check=TRUE) <- value
return(x)
})
## chromosome
setGeneric("chromosome", function(x, ...) standardGeneric("chromosome"))
setGeneric("chromosome<-", function(x, value) standardGeneric("chromosome<-"))
setGeneric("chr", function(x,...) standardGeneric("chr"))
setGeneric("chr<-", function(x, value) standardGeneric("chr<-"))
setMethod("chromosome","genlight", function(x,...){
if(length(x@chromosome)==0) return(NULL)
return(x@chromosome)
})
setMethod("chr","genlight", function(x,...){
return(chromosome(x))
})
setReplaceMethod("chromosome","genlight",function(x,value) {
if(is.null(value)){
slot(x, "chromosome", check=TRUE) <- value
return(x)
}
if(length(value) != nLoc(x)) stop("Vector length does no match number of loci")
slot(x,"chromosome",check=TRUE) <- factor(value)
return(x)
})
setReplaceMethod("chr","genlight",function(x,value) {
chromosome(x) <- value
return(x)
})
## position
setGeneric("position", function(x, ...) standardGeneric("position"))
setGeneric("position<-", function(x, value) standardGeneric("position<-"))
setMethod("position","genlight", function(x,...){
if(length(x@position)==0) return(NULL)
return(x@position)
})
setReplaceMethod("position","genlight",function(x,value) {
if(is.null(value)){
slot(x, "position", check=TRUE) <- value
return(x)
}
if(length(value) != nLoc(x)) stop("Vector length does no match number of loci")
slot(x,"position",check=TRUE) <- as.integer(value)
return(x)
})
## NA.posi
setGeneric("NA.posi", function(x, ...) standardGeneric("NA.posi"))
setMethod("NA.posi","SNPbin", function(x,...){
return(x@NA.posi)
})
setMethod("NA.posi","genlight", function(x,...){
res <- lapply(x@gen, function(e) e@NA.posi)
names(res) <- indNames(x)
return(res)
})
## pop
setMethod("pop","genlight", function(x){
return(x@pop)
})
setReplaceMethod("pop","genlight",function(x,value) {
if(is.null(value) | length(value)==0){
slot(x, "pop", check=TRUE) <- NULL
return(x)
}
if(length(value) != nInd(x)) stop("Vector length does no match number of individuals")
slot(x,"pop", check=TRUE) <- factor(value)
return(x)
})
## other
setMethod("other","genlight", function(x,...){
if(length(x@other)==0) return(NULL)
return(x@other)
})
setReplaceMethod("other","genlight",function(x,value) {
if( !is.null(value) && (!is.list(value) | is.data.frame(value)) ) {
value <- list(value)
}
slot(x,"other",check=TRUE) <- value
return(x)
})
###################
##
## CONVERSIONS
##
###################
############
## .bin2raw
###########
## each byte takes a value on [0,255]
## function to code multiple SNPs on a byte
## 8 combinations of SNPs can be coded onto a single byte (0->255)
.bin2raw <- function(vecSnp){
## handle missing data
NAposi <- which(is.na(vecSnp))
if(length(NAposi)>0){
vecSnp[is.na(vecSnp)] <- 0L
}
nbBytes <- length(vecSnp) %/% 8
if(length(vecSnp) %% 8 > 0) {nbBytes <- nbBytes +1}
ori.length <- length(vecSnp)
new.length <- 8*nbBytes
vecSnp <- c(vecSnp, rep(0, new.length-ori.length)) # fill the end with 0 of necessary
## map info to bytes (0:255)
vecSnp <- as.integer(vecSnp)
##vecRaw <- integer(nbBytes) # no longer needed - sending raw type directly
vecRaw <- raw(nbBytes)
# Wed Apr 12 08:26:49 2017 ------------------------------
# I changed this from the C function to the base R function `packBits()`
# because I was getting an error on solaris sparc. It suffers an increase
# of a fraction of a millisecond in processing time, but I think that's
# managable.
#
# ZNK
#
# library(microbenchmark)
# set.seed(5000)
# dat <- sample(c(0,1,NA), 1e5, prob=c(.495, .495, .01), replace=TRUE)
# y <- microbenchmark(C = .bin2raw(dat), base = .bin2raw_original(dat), times = 1000)
# print(y, "relative")
## Unit: relative
## expr min lq mean median uq max neval cld
## C 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1000 a
## base 1.074409 1.113972 1.103931 1.103837 1.083847 1.183597 1000 a
vecRaw <- packBits(vecSnp)
# vecRaw <- .C("binIntToBytes", vecSnp, length(vecSnp), vecRaw, nbBytes, PACKAGE="adegenet")[[3]]
## vecraw <- sapply(seq(1, by=8, length=nbBytes), function(i) which(apply(SNPCOMB,1, function(e) all(temp[i:(i+7)]==e))) ) # old R version
## return result
res <- list(snp=vecRaw, n.loc=as.integer(ori.length), NA.posi=as.integer(NAposi))
return(res)
} # end .bin2raw
###########
## .raw2bin
###########
## convert vector of raw to 0/1 integers
.raw2bin <- function(x){
if(!is.raw(x)) stop("x is not of class raw")
## SNPCOMB <- as.matrix(expand.grid(rep(list(c(0,1)), 8)))
## colnames(SNPCOMB) <- NULL
## res <- unlist(lapply(as.integer(x), function(i) SNPCOMB[i+1,]))
res <- .C("bytesToBinInt", x, length(x), integer(length(x)*8), PACKAGE="adegenet")[[3]]
# Below is an equivalent function in base, but it's slowed down 8x
# res <- as.integer(rawToBits(x))
return(res)
} # end .raw2bin
#############
## .SNPbin2int
#############
## convert SNPbin to integers (0/1/2...)
.SNPbin2int <- function(x){
##res <- lapply(x@snp, .raw2bin)
resSize <- length(x@snp[[1]])*8
# Wed Apr 12 08:49:02 2017 ------------------------------
# I am leaving this function along as it does not necessarily break solaris,
# but I am leaving the code and timings just in case.
#
# ZNK
res <- .C("bytesToInt", unlist(x@snp), length(x@snp[[1]]), length(x@snp), integer(resSize), as.integer(resSize), PACKAGE="adegenet")[[4]][1:nLoc(x)]
# library(microbenchmark)
# set.seed(5000)
# dat <- sample(c(0:2,NA), 1e5, prob=c(rep(.995/5,3), 0.005), replace=TRUE)
# x <- new("SNPbin", dat)
# y <- microbenchmark(C = .SNPbin2int(x), base = .SNPbin2int1(x), times = 1000)
# print(y, "relative")
## Unit: relative
## expr min lq mean median uq max neval cld
## C 1.000000 1.000000 1.000000 1.000000 1.00000 1.0000000 1000 a
## base 2.206831 1.200831 1.019783 1.168149 1.02654 0.1668163 1000 a
# res <- vapply(x@snp, function(x) as.integer(rawToBits(x)), integer(resSize))
# res <- apply(res[1:nLoc(x), ], 1, sum)
if (length(x@NA.posi) > 0){
res[x@NA.posi] <- NA_integer_
}
return(res)
} # end .SNPbin2int
#############
## as methods
#############
## KLUDGE - needed for as.matrix.genlight to be dispatched correctly (R-2.12.1)
setGeneric("as.matrix")
## SNPbin/genlight -> other
setAs("SNPbin", "integer", def=function(from){
res <- .SNPbin2int(from)
return(res)
})
#' @export
as.integer.SNPbin <- function(x, ...){
return(as(x, "integer"))
}
setAs("genlight", "matrix", def=function(from){
res <- unlist(lapply(from@gen, as.integer))
res <- matrix(res, ncol=nLoc(from), nrow=nInd(from), byrow=TRUE)
colnames(res) <- locNames(from)
## if(!is.null(alleles(from))){
## colnames(res) <- paste(locNames(from), alleles(from), sep=ifelse(is.null(locNames(from)), "", "."))
## }
rownames(res) <- indNames(from)
return(res)
})
#' @method as.matrix genlight
#' @export
as.matrix.genlight <- function(x, ...){
return(as(x, "matrix"))
}
setAs("genlight", "data.frame", def=function(from){
return(as.data.frame(as.matrix(from)))
})
#' @export
as.data.frame.genlight <- function(x, ...){
return(as(x, "data.frame"))
}
setAs("genlight", "list", def=function(from){
res <- lapply(from@gen, as.integer)
names(res) <- indNames(from)
return(res)
})
#' @export
as.list.genlight <- function(x, ...){
return(as(x, "list"))
}
## other -> SNPbin/genlight
setGeneric("as.SNPbin", function(x, ...) standardGeneric("as.SNPbin"))
setGeneric("as.genlight", function(x, ...) standardGeneric("as.genlight"))
setAs("integer", "SNPbin", def = function(from){
res <- new("SNPbin", from)
return(res)
})
setAs("numeric", "SNPbin", def = function(from){
res <- new("SNPbin", from)
return(res)
})
setMethod("as.SNPbin", "integer", function(x, ...) as(x, "SNPbin"))
setMethod("as.SNPbin", "numeric", function(x, ...) as(x, "SNPbin"))
setAs("matrix", "genlight", def=function(from){
return(new("genlight", from))
})
setAs("data.frame", "genlight", def=function(from){
return(new("genlight", from))
})
setAs("list", "genlight", def=function(from){
return(new("genlight", from))
})
## setAs("snp.matrix", "genlight", def=function(from){
## return(new("genlight", from))
## })
setMethod("as.genlight", "matrix", function(x, ...) as(x, "genlight"))
setMethod("as.genlight", "data.frame", function(x, ...) as(x, "genlight"))
setMethod("as.genlight", "list", function(x, ...) as(x, "genlight"))
## setMethod("as.genlight", "snp.matrix", function(x, ...) as(x, "genlight"))
setMethod("tab", "genlight",
function(x, freq = FALSE,
NA.method = c("mean", "asis", "zero")) {
NA.method <- match.arg(NA.method)
out <- as.matrix(x)
if (freq) {
out <- out / ploidy(x)
}
if (NA.method == "mean"){
f1 <- function(vec){
m <- mean(vec, na.rm = TRUE)
vec[is.na(vec)] <- m
return(vec)
}
out <- apply(out, 2, f1)
}
if (NA.method == "zero"){
out[is.na(out)] <- ifelse(freq, 0, 0L)
}
return(out)
})
################################
## testing SNPbin
##
##
## library(adegenet)
## dat <- c(1,0,0,1,0,NA,1,0,0,0,0,1)
## x <- new("SNPbin",dat)
## as.integer(x)
## HAPLOID DATA - NO NA
## dat <- sample(c(0L,1L), 1e6, replace=TRUE)
## x <- new("SNPbin", dat)
## identical(as(x, "integer"),dat) # SHOULD NORMALLY BE TRUE
## all(as(x, "integer") == dat, na.rm=TRUE) # MUST BE TRUE
## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
## HAPLOID DATA - WITH NAs
## dat <- sample(c(0,1,NA), 1e6, prob=c(.5, .49, .01), replace=TRUE)
## x <- new("SNPbin", dat)
## identical(as(x, "integer"),dat) # SHOULD NORMALLY BE TRUE
## all(as(x, "integer") == dat, na.rm=TRUE) # MUST BE TRUE
## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
## DIPLOID DATA
## dat <- sample(c(0:2,NA), 1e6, prob=c(.4, .4, .195 ,.005), replace=TRUE)
## x <- new("SNPbin", dat)
## identical(as(x, "integer"),dat) # MUST BE TRUE
## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
## POLYLOID DATA
## dat <- sample(c(0:5,NA), 1e6, prob=c(rep(.995/6,6), 0.005), replace=TRUE)
## x <- new("SNPbin", dat)
## identical(as(x, "integer"),dat) # MUST BE TRUE
## object.size(dat)/object.size(x) # EFFICIENCY OF CONVERSION
################################
## testing genlight
##
##
## SIMPLE TESTS
## dat <- c(1,0,0,1,0,0,1,1,1,0,1)
## x <- new("SNPbin",dat)$snp[[1]]
## as.integer(x)==dat
## library(adegenet)
## dat <- list(toto=c(1,1,0,0), titi=c(NA,1,1,0), tata=c(NA,0,3, NA))
## x <- new("genlight", dat)
## x
## as.list(x)
## as.matrix(x)
## identical(x, new("genlight", as.list(x))) # round trip - list - MUST BE TRUE
## identical(x, new("genlight", as.matrix(x))) # round trip - matrix - MUST BE TRUE
## identical(x, new("genlight", as.data.frame(x))) # round trip - data.frame - MUST BE TRUE
## ## test subsetting
## identical(as.list(x[c(1,3)]), as.list(x)[c(1,3)]) # MUST BE TRUE
## identical(x, x[]) # MUST BE TRUE
## all.equal(t(as.matrix(as.data.frame(dat)))[,1:3], as.matrix(x[,1:3])) # MUST BE TRUE
## ## ## BIG SCALE TEST - HAPLOID DATA WITH NA
## library(adegenet)
## dat <- lapply(1:50, function(i) sample(c(0,1,NA), 1e6, prob=c(.5, .49, .01), replace=TRUE))
## names(dat) <- paste("indiv", 1:length(dat))
## print(object.size(dat), unit="aut")
## system.time(x <- new("genlight", dat)) # conversion + time taken
## print(object.size(x), unit="au")
## object.size(dat)/object.size(x) # conversion efficiency
## ## time taken by subsetting (quite long, +- 35sec)
## system.time(y <- x[1:10, 1:5e5])
## c, cbind, rbind ##
## a <- new("genlight", list(c(1,0,1), c(0,0,1,0)) )
## b <- new("genlight", list(c(1,0,1,1,1,1), c(1,0)) )
## locNames(a) <- letters[1:4]
## locNames(b) <- 1:6
## c <- cbind(a,b)
## identical(as.matrix(c),cbind(as.matrix(a), as.matrix(b))) # MUST BE TRUE
## identical(as.matrix(rbind(a,a)),rbind(as.matrix(a),as.matrix(a)))
## test subsetting with/without @other ##
## x <- new("genlight", list(a=1,b=0,c=1), other=list(1:3, letters, data.frame(2:4)))
## pop(x) <- c("pop1","pop1", "pop2")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.