'test.submodel.valid.phyml' <-
function(inputfile, path_phyml, path_seqgen, model, gamma=TRUE, pinv=FALSE, nbootstrap=100)
{
simfile<-paste(inputfile,".sim",sep="")
if(is.na(match(model,dat.modelcode[,1]))) return("check the model's name. Use HKY instead of HKY85")
modelcode = dat.modelcode[match(model,dat.modelcode[,1]),3:4]
command <- paste(path_phyml, "-b0 -i", inputfile," -m",modelcode[1])
if(modelcode[2]=="unequal") command = paste(command,"-f e")
if(modelcode[2]=="equal") command = paste(command,"-f0.25,0.25,0.25,0.25")
if(gamma==FALSE) command <- paste(command, " -c1")
if(pinv==TRUE) command <- paste(command, "-ve")
a=try(system(command))
if(a==1) return("The model is incorrect. Check the model's name")
fit = parse.phyml(paste(inputfile,"_phyml_stats.txt",sep=""))
if(length(fit$gamma)==0) fit$gamma = 0
if(length(fit$inv)==0) fit$inv = 0
run.seqgen(path_seqgen,
seed = floor(runif(1)*434646+46548778), basefreq = fit$basefreq, rate = fit$rates, seqlength = "100000",
gamma = fit$alpha, inv=fit$inv, treefile=paste(inputfile,"_phyml_tree.txt",sep=""), saveformat = "phylip", outputfile=simfile)
simdata<-read.dna.seq(simfile,format="phylip")
name<-sort(simdata$name)
nspecies<-length(name)
simseq<-simdata$seq
simseq<-tolower(simseq)
simseq<-simseq[match(name,simdata$name),]
simlength<-dim(simseq)[2]
data<-read.dna.seq(inputfile,format="phylip")
seq<-tolower(data$seq)
seq<-seq[match(name,data$name),]
seqlength<-dim(seq)[2]
######################################################
# chisquare-test for base frequencies
######################################################
base.test=matrix(0,nrow=nspecies,ncol=11)
colnames(base.test) = c("species","obs_a","obs_c","obs_g","obs_t","exp_a","exp_c","exp_g","exp_t","teststat","p.value")
base.test[,1] = name
for(i in 1:nspecies)
{
obs<-c(sum(seq[i,]=="a"),sum(seq[i,]=="c"),sum(seq[i,]=="g"),sum(seq[i,]=="t"))
base.test[i,2:5] <- obs
base.test[i,6:9] <- fit$basefreq
result<-chisq.test(obs,p=fit$basefreq,rescale.p=TRUE)
base.test[i,10] <- round(result$statistic,5)
base.test[i,11] <- round(result$p.value,5)
}
print("The Marginal Test for base frequencies is complete!")
######################################################
# chisquare-test for doublet frequencies
######################################################
pair_pvalue<-matrix(0,nspecies,nspecies)
colnames(pair_pvalue) = name
rownames(pair_pvalue) = name
for(m in 1:(nspecies-1))
for(n in (m+1):nspecies)
{
simseq1<-simseq[m,]
simseq2<-simseq[n,]
simpair<-.pairfreq(simseq1,simseq2)
exppair<-simpair/sum(simpair)
obsseq1<-seq[m,]
obsseq2<-seq[n,]
obspair<-.pairfreq(obsseq1,obsseq2)
if(length(which(exppair==0))>0) obspair = obspair[-which(exppair==0)]
if(length(which(exppair==0))>0) exppair = exppair[-which(exppair==0)]
if(sum(obspair)==0){pair_pvalue[m,n]=NA;next;}
pair_pvalue[m,n]<-chisq.test(obspair,p=exppair,rescale.p=TRUE, simulate.p.value=T)$p.value
}
print("The Marginal Test for doublet frequencies is complete!")
######################################################
# chisquare-test for site patterns
######################################################
newseq <- gsub("N", "?", seq)
newseq <- gsub("-", "?", newseq)
newseq <- gsub("O", "?", newseq)
site_pattern <- apply(newseq, 2, function(x) paste(x, collapse = "", sep = ""))
obs_freq <- table(site_pattern)
pattern <- names(obs_freq)
newobs_freq<-obs_freq
new = .patternfreq(pattern, newseq)
obs_pattern_freq <- new$freq
obs_pattern <- new$pattern
exp_pattern_freq = .patternfreq(obs_pattern, simseq)$freq
teststat<-sum(abs(exp_pattern_freq-obs_pattern_freq))
nbootstrap<-nbootstrap
bootstat<-1:nbootstrap
for(j in 1:nbootstrap)
{
bootsample<-simseq[,sample(1:simlength,seqlength)]
freq = .patternfreq(obs_pattern, bootsample)$freq
bootstat[j]<-sum(abs(exp_pattern_freq-freq))
}
sitepattern.pvalue = sum(bootstat>teststat)/nbootstrap
z <- list(base.test=base.test, doublet.pvalue=pair_pvalue, sitepattern.pvalue=sitepattern.pvalue)
z
}
.patternfreq<-function(pattern, seq)
{
nspecies = dim(seq)[1]
newobs_freq = 1:length(pattern)
names(newobs_freq) = pattern
for(i in 1:length(pattern))
{
str<-unlist(strsplit(pattern[i],split=""))
index<-which(str=="?")
if(length(index)==nspecies){
newobs_freq[i] <- NA
next
}
{if(length(index)>0){
newp<-paste(str[-index],collapse="",sep="")
news<-seq[-index,]
}else{
newp<-paste(str,collapse="",sep="")
news<-seq
}}
if(length(index)<nspecies-1) news<-apply(news, 2, function(x) paste(x, collapse = "", sep = ""))
nmissing<-length(grep("\\?",news))
if(nmissing<length(news)*0.8) newobs_freq[i]<-length(grep(newp,news))/(length(news)-nmissing)
else newobs_freq[i] <- NA
}
newobs<-newobs_freq[!is.na(newobs_freq)]
newpattern<-names(newobs)
z <- list(pattern=newpattern, freq=newobs)
z
}
.pairfreq<-function(seq1,seq2)
{
x<-paste(seq1,seq2,sep="")
result<-c(sum(x=="aa"),sum(x=="ac"),sum(x=="ag"),sum(x=="at"),sum(x=="ca"),sum(x=="cc"),sum(x=="cg"),sum(x=="ct"),sum(x=="ga"),sum(x=="gc"),sum(x=="gg"),sum(x=="gt"),sum(x=="ta"),sum(x=="tc"),sum(x=="tg"),sum(x=="tt"))
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.