R/datahelperfunction.R

#' Take dataset generated by a sampler an concatenate
#'
#' @export
concatenateData <- function(dataDirectory, data1,data2,force=FALSE) {

  print("use : ... dataDirectory data1 data2 force(set TRUE if you want to remove existing concatenate data)")

  #test if data set can be concatened
  if( (data1$nbIndiv != data2$nbIndiv) ) {

    stop("data1$nbIndiv != data2$nbIndiv")

  }


  #test if data already exist
  outputName = paste( "concatenatedData_",data1$outputName,"_",data2$outputName,sep="" )
  simulationDirectory=paste( dataDirectory,outputName,sep="" )
  resFile = paste( simulationDirectory,"/simulation.res",sep="" )
  if ( file.exists(file = resFile ) && !force ) {
    print("Data already simulated, we return it!!")
    load(resFile)
    return(res)
  }

  #create directory
  system(paste("rm -rf ", simulationDirectory,sep=""))
  system(paste("mkdir ", simulationDirectory,sep=""))

  setwd(paste(simulationDirectory,sep=""))

  ##################
  #Concatenate data#
  ##################

  res = list()

  res$outputName = outputName
  res$outputFile = paste(getwd(),"/",outputName,sep="")
  res$nbIndiv = data1$nbIndiv

  res$nbLocus = data1$nbLocus + data2$nbLocus

  res$genotype = cbind( data1$genotype, data2$genotype )

  res$coord = data1$coord

  res$indData1 = 1:data1$nbLocus

  res$indData2 = data1$nbLocus + 1:data2$nbLocus

  res$tessData = cbind( data1$tessData, data2$tessData[,-(1:2)] )

  #############
  #Export Data#
  #############

  #export .mat format
  write.table(file = paste(outputName,".mat",sep=""),
              res$genotype, row.names = F, col.names = F, quote = F, sep = " ")


  #export .geno format
  write.table(file = paste(outputName,".geno",sep=""),
              t(res$genotype), row.names = F, col.names = F, quote = F, sep = "")


  #export .coord file
  write.table(file = paste(outputName,".coord",sep=""),
              res$coord, row.names = F, col.names = F, quote = F, sep = " ")


  #export .tess format
  write.table(file = paste(outputName,".tess",sep=""),
              res$tessData, row.names = F, col.names = F, quote = F, sep = " ")



  save(res,file = resFile)
  return(res)

}




#' Compute ancestry coeficient from pop allocation
#'
#' @export
QfromPop <- function(pop) {

  nbPop = max(pop)
  Q = matrix(data = 0, nrow = length(pop), ncol = nbPop)
  for (i in 1:nbPop) {
    Q[pop==i,i] = 1
  }
  return(Q)
}

#' Compute allele frequency for each SNIPs in each pop.
#' Returns a matrix of size  nbLocus x nbPop
#'
#' @export
freqInPop <- function(data, pop, ploidy ) {

  nbPop = max(pop)
  nbLocus = ncol(data)

  freq = matrix(data = 0, nrow = nbLocus, ncol = nbPop)
  for (i in 1:nbPop) {
    freq[,i] = apply(data[pop==i,],2,mean)/ploidy
  }
  return(freq)

}
cayek/BioCompToolsR documentation built on May 13, 2019, 1:20 p.m.