R/observed.coexp.sumstat.R

Defines functions observed.coexp.sumstat

Documented in observed.coexp.sumstat

#' Observed codemographic hippersummary statistics calculation
#' @description This function calculates the hippersummary statistics for a collection of fasta files present in a particular folder. It is instended to be used for codemographic inference in conjunction with simulations generated by sim.coexp or sim.coexp2. Here the same stats are calculated.
#' @param path.to.fasta The path to the directory containing the fasta files for all species/populations.
#' @author Marcelo Gehara
#' @return A list of hippersummary statistics.
#' @references Gehara M., Garda A.A., Werneck F.P., Oliveira E.F., da Fonseca E.M., Camurugi F., Magalhães F. de M., Lanna F.M., Sites J.W., Marques R., Silveira-Filho R., São Pedro V.A., Colli G.R., Costa G.C., & Burbrink F.T. (2017) Estimating synchronous demographic changes across populations using hABC and its application for a herpetological community from northeastern Brazil. Molecular Ecology, 26, 4756–4771.
#' @references Chan Y.L., Schanzenbach D., & Hickerson M.J. (2014) Detecting concerted demographic response across community assemblages using hierarchical approximate Bayesian computation. Molecular Biology and Evolution, 31, 2501–2515.
#' @export
observed.coexp.sumstat<-function(path.to.fasta){
  setwd(path.to.fasta)
  fasta.files<-list.files()
  fasta.files<-fasta.files[grep(".fas",fasta.files)]

  ms.output<-fasta2ms(path.to.fasta,fasta.files,write.file=F)
  bp.length<-list()
  ss<-list()
  for(i in 1:length(ms.output)){
    fas<-read.dna(file=fasta.files[i], format="fasta")
    bp.length[[i]]<-ncol(fas)
    ss[[i]]<-as.numeric(strsplit(ms.output[[i]][3]," ")[[1]][2])
  }

  sum.stat<-NULL
  for (j in 1:length(ms.output)){
    x<-ms.to.DNAbin(ms.output = ms.output[[j]],bp.length = bp.length[[j]])

    pi<-pegas::nuc.div(x)
    H<-H.div(x)[2]
    TD<-pegas::tajima.test(x)$D

    #sfs<-site.spectrum(x,folded=T)
    #sfs<-sfs/sum(na.omit(sfs))
    #sfs<-sfs[1:3]

    SS<-c(pi[[1]],ss[[j]],H,TD[1])
    sum.stat<-rbind(sum.stat,SS)
    }

vari<-diag(var(sum.stat))
means<-colMeans(sum.stat,na.rm=T)
skew<-NULL
kur<-NULL
for(u in 1:ncol(sum.stat)){
  s<-skewness(sum.stat[,u])
  skew<-c(skew,s)
  k<-kurtosis(sum.stat[,u])
  kur<-c(kur,k)
}

h.s<-c(vari[1],means[1],skew[1],kur[1],
       vari[2],means[2],skew[2],kur[2],
       vari[3],means[3],skew[3],kur[3],
       vari[4],means[4],skew[4],kur[4])

names(h.s)<-c("var.pi","mean.pi","skew.pi","kur.pi",
              "var.ss","mean.ss","skew.ss","kur.ss",
              "var.H","mean.H","skew.H","kur.H",
              "var.TD","mean.TD","skew.TD","kur.TD")
#write.table(t(h.s),file="h.sum.stat.txt",col.names = F, row.names=F, append=T,sep="\t")
return(h.s)
}
gehara/PipeMaster documentation built on April 19, 2024, 8:14 a.m.