R/countdata2genobaypass.R

Defines functions countdata2genobaypass

Documented in countdata2genobaypass

#' Convert a countdata object into BayPass input files.
#' @description Convert a countdata object into BayPass allele count file. A file containing SNP details is also printed out. Options to generate sub-samples  (e.g., for large number of SNPs) are also available.
#' @param countdata A countdata object
#' @param writing.dir Directory where to create the files  (e.g., set writing.dir=getwd() to copy in the current working directory)
#' @param prefix Prefix used for output file names
#' @param subsamplesize Size of the sub-samples. If <=1 (default), all the SNPs are considered in the output
#' @param subsamplingmethod If sub-sampling is activated (argument subsamplesize), define the method used for subsampling that might be either i) "random" (A single data set consisting of randmly chosen SNPs is generated) or ii) "thinning", sub-samples are generated by taking SNPs one every nsub=floor(nsnp/subsamplesize) in the order of the map (a suffix ".subn" is added to each sub-sample files where n varies from 1 to nsub).
#' @return Files containing allele count (in BayPass format) and SNP details (as in the snp.info matrix from the countdata object)
#' @seealso To generate countdata object, see \code{\link{genotreemix2countdata}}, \code{\link{genobaypass2countdata}}
#' @examples
#'  make.example.files(writing.dir=tempdir())
#'  pooldata=popsync2pooldata(sync.file=paste0(tempdir(),"/ex.sync.gz"),poolsizes=rep(50,15))
#'  ##NOTE: This example is just for the sake of illustration as it amounts to
#'  ##interpret read count as allele count which must not be done in practice!
#'  countdata=genobaypass2countdata(genobaypass.file=paste0(tempdir(),"/genobaypass")) 
#'  countdata2genobaypass(countdata=countdata,writing.dir=tempdir())
#' @export
countdata2genobaypass<-function(countdata,writing.dir=getwd(),prefix="",subsamplesize=-1,subsamplingmethod="thinning"){
  if(writing.dir==""){stop("ERROR: Please provide the directory path where to copy the example files  (e.g., set writing.dir=getwd() to copy in the current working directory)")}
  if(!(is.countdata(countdata))) {stop("Data are not formatted as a valid countdata object...")}
  subsampling=FALSE
  if(subsamplesize>1){
   if(!(subsamplingmethod %in% c("thinning","random"))){stop("subsampling method should either be \"random\" or \"thinning\"")}
   if(subsamplingmethod=="thinning"){
     tmp.n=floor(countdata@nsnp/subsamplesize)
     cat(tmp.n,"sub-samples of ca.",subsamplesize,"SNPs will be generated by tacking one SNP every",tmp.n,"\n")
   }else{
     cat("One sub-samples of",subsamplesize,"randomly chosen SNPs will be generated\n")
   }
  subsampling=TRUE  
  }

  mat.count=matrix(0,countdata@nsnp,2*countdata@npops)
  tmp.id=2*(1:countdata@npops)-1
  mat.count[,tmp.id]=countdata@refallele.count
  mat.count[,(tmp.id+1)]=countdata@total.count - countdata@refallele.count
  outgenofilename      =paste0(writing.dir,"/genobaypass")
  outsnpdetfilename    =paste0(writing.dir,"/snpdet")
  if(nchar(prefix)>0){
    outgenofilename=paste0(writing.dir,"/",prefix,".genobaypass")
    outsnpdetfilename=paste0(writing.dir,"/",prefix,".snpdet")
  }
  if(subsampling){
    if(subsamplingmethod=="thinning"){
      tmp.n=floor(countdata@nsnp/subsamplesize)
      for(i in 1:tmp.n){
        tmp.sel=seq(i,countdata@nsnp,tmp.n)
        fwrite(countdata@snp.info[tmp.sel,],file=paste0(outsnpdetfilename,".sub",i),sep=" ",col.names=F,row.names=F)
        fwrite(mat.count[tmp.sel,],file=paste0(outgenofilename,".sub",i),sep=" ",col.names=F,row.names=F)# 
      }
    }
    if(subsamplingmethod=="random"){
      tmp.sel=sort(sample(1:countdata@nsnp,subsamplesize))
      fwrite(countdata@snp.info[tmp.sel,],file=paste0(outsnpdetfilename,".sub"),sep=" ",col.names=F,row.names=F)
      fwrite(mat.count[tmp.sel,],file=paste0(outgenofilename,".sub"),sep=" ",col.names=F,row.names=F)# 
      }
  }else{
   fwrite(countdata@snp.info,file=outsnpdetfilename,sep=" ",col.names=F,row.names=F)  
   fwrite(mat.count,file=outgenofilename,sep=" ",col.names=F,row.names=F) 
  }
}

Try the poolfstat package in your browser

Any scripts or data that you put into this service are public.

poolfstat documentation built on April 4, 2025, 1:49 a.m.