R/pooldata2genoselestim.R

Defines functions pooldata2genoselestim

Documented in pooldata2genoselestim

#' Convert a pooldata object into SelEstim input files.
#' @description Convert a pooldata object into SelEstim allele read count. A file containing SNP details is also printed out. Options to generate sub-samples  (e.g., for large number of SNPs) are also available.
#' @param pooldata A pooldata object containing Pool-Seq information (see \code{\link{vcf2pooldata}} and \code{\link{popsync2pooldata}})
#' @param writing.dir Directory where to create the files  (e.g., set writing.dir=getwd() to copy in the current working directory)
#' @param prefix Prefix used for output file names
#' @param subsamplesize Size of the sub-samples. If <=1 (default), all the SNPs are considered in the output
#' @param subsamplingmethod If sub-sampling is activated (argument subsamplesize), define the method used for subsampling that might be either i) "random" (A single data set consisting of randmly chosen SNPs is generated) or ii) "thinning", sub-samples are generated by taking SNPs one every nsub=floor(nsnp/subsamplesize) in the order of the map (a suffix ".subn" is added to each sub-sample files where n varies from 1 to nsub).
#' @return Files containing allele count (in SelEstim Pool-Seq format) and SNP details (as in the snp.info matrix from the pooldata object)
#' @seealso To generate pooldata object, see \code{\link{vcf2pooldata}}, \code{\link{popsync2pooldata}}
#' @examples
#'  make.example.files(writing.dir=tempdir())
#'  pooldata=popsync2pooldata(sync.file=paste0(tempdir(),"/ex.sync.gz"),poolsizes=rep(50,15))
#'  pooldata2genoselestim(pooldata=pooldata,writing.dir=tempdir())
#' @export
pooldata2genoselestim=function(pooldata,writing.dir=getwd(),prefix="",subsamplesize=-1,subsamplingmethod="thinning"){
  if(writing.dir==""){stop("ERROR: Please provide the directory path where to copy the example files  (e.g., set writing.dir=getwd() to copy in the current working directory)")}
  if(!(is.pooldata(pooldata))) {stop("Data are not formatted as a valid pooldata object...")}
  subsampling=FALSE
  if(subsamplesize>1){
   if(!(subsamplingmethod %in% c("thinning","random"))){stop("subsampling method should either be \"random\" or \"thinning\"")}
   if(subsamplingmethod=="thinning"){
     tmp.n=floor(pooldata@nsnp/subsamplesize)
     cat(tmp.n,"sub-samples of ca.",subsamplesize,"SNPs will be generated by tacking one SNP every",tmp.n,"\n")
   }else{
     cat("One sub-samples of",subsamplesize,"randomly chosen SNPs will be generated\n")
   }
  subsampling=TRUE  
  }

  mat.count=matrix(0,pooldata@nsnp,2*pooldata@npools)
  tmp.id=2*(1:pooldata@npools)-1
  mat.count[,tmp.id]=pooldata@refallele.readcount
  mat.count[,(tmp.id+1)]=pooldata@readcoverage - pooldata@refallele.readcount
  outgenofilename=paste0(writing.dir,"/genoselestim")
  outsnpdetfilename=paste0(writing.dir,"/snpdet")
  if(nchar(prefix)>0){
    outgenofilename=paste0(writing.dir,"/",prefix,".genoselestim")
    outsnpdetfilename=paste0(writing.dir,"/",prefix,".snpdet")
  }
  if(subsampling){
    if(subsamplingmethod=="thinning"){
      tmp.n=floor(pooldata@nsnp/subsamplesize)
      for(i in 1:tmp.n){
        tmp.sel=seq(i,pooldata@nsnp,tmp.n)
        write.table(file=paste0(outsnpdetfilename,".sub",i),pooldata@snp.info[tmp.sel,],quote=F,col.names=F,row.names=F) 
        tmp.genofilename=paste0(outgenofilename,".sub",i)
        cat(file=tmp.genofilename,pooldata@npools,"\n",length(tmp.sel),"\n",sep="")
        write.table(file=tmp.genofilename,t(pooldata@poolsizes),quote=F,col.names=F,row.names=F,append=T)
        write.table(file=tmp.genofilename,mat.count[tmp.sel,],quote=F,col.names=F,row.names=F,append=T) 
      }
    }
    if(subsamplingmethod=="random"){
      tmp.sel=sort(sample(1:pooldata@nsnp,subsamplesize))
      write.table(file=outsnpdetfilename,pooldata@snp.info[tmp.sel,],quote=F,col.names=F,row.names=F) 
      cat(file=outgenofilename,pooldata@npools,"\n",subsamplesize,"\n",sep="")
      write.table(file=outgenofilename,t(pooldata@poolsizes),quote=F,col.names=F,row.names=F,append=T)
      write.table(file=outgenofilename,mat.count[tmp.sel,],quote=F,col.names=F,row.names=F,append=T) 
      }
  }else{
   write.table(file=outsnpdetfilename,pooldata@snp.info,quote=F,col.names=F,row.names=F) 
   cat(file=outgenofilename,pooldata@npools,"\n",pooldata@nsnp,"\n",sep="")
   write.table(file=outgenofilename,t(pooldata@poolsizes),quote=F,col.names=F,row.names=F,append=T)
   write.table(file=outgenofilename,mat.count,quote=F,col.names=F,row.names=F,append=T) 
  }
}

Try the poolfstat package in your browser

Any scripts or data that you put into this service are public.

poolfstat documentation built on Sept. 8, 2023, 5:49 p.m.