Nothing
#' Calculate Intraspecific and Interspecific Variation
#'
#' @description Calculation intraspecific variation (sd) of the potential species theta1, and mean interspecific
#' distance (here, the mean distance between the potential species and its nearest neighbor theta2)
#' (fuzzy-set based method,slightly modified from Zhang et al. 2012). The calculation was done for all species in the
#' reference dataset.
#' @param ref object of class "DNAbin" used as a reference dataset, which contains taxon information.
#' @return a data frame containing intraspecific (sd, theta1) and interspefic variation (mean) of all species,
#' and their corresponding nearest neighbor (NN).
#' @keywords FMFtheta12
#' @export
#' @import stats
#' @import graphics
#' @import utils
#' @import sp
#'
#' @author Ai-bing ZHANG, PhD. CNU, Beijing, CHINA, contact at zhangab2008 (at) mail.cnu.edu.cn.
#' @references
#' Zhang, A. B., C. Muster, H.B. Liang, C.D. Zhu, R. Crozier, P. Wan, J. Feng, R. D. Ward.(2012). A fuzzy-set-theory-based approach
#' to analyse species membership in DNA barcoding. Molecular Ecology, 21(8):1848-63.
#' @examples
#'
#' data(TibetanMoth)
#' ref<-as.DNAbin(as.character(TibetanMoth[1:50,]))
#' FMF.theta12<-FMFtheta12(ref)
#' FMF.theta12
FMFtheta12<-function(ref){
NAMES<-function(seqs){
if(mode(seqs)=="raw"){
SeqNames<-attr(seqs,"dimnames")[[1]]
}else{ ### mode(seqs)=="list"
SeqNames<-names(seqs)
}
#names(SeqNames)<-NULL
if(length(SeqNames)==0) {
stop("the mode(seqs) is wrong!")
}else{
return(SeqNames) }
}
digitize.DNA<-function(seqs){
locus<-toupper(as.character(seqs))
digitized.DNA<-locus
digitized.DNA[digitized.DNA=="A"]<-0.1
digitized.DNA[digitized.DNA=="T"]<-0.2
digitized.DNA[digitized.DNA=="G"]<-0.3
digitized.DNA[digitized.DNA=="C"]<-0.4
digitized.DNA[digitized.DNA=="-"]<-0.5
digitized.DNA[digitized.DNA=="N"]<-0.6
digitized.DNA[digitized.DNA=="R"]<-0
digitized.DNA[digitized.DNA=="Y"]<-0
digitized.DNA[digitized.DNA=="M"]<-0
digitized.DNA[digitized.DNA=="K"]<-0
digitized.DNA[digitized.DNA=="S"]<-0
digitized.DNA[digitized.DNA=="W"]<-0
digitized.DNA[digitized.DNA=="H"]<-0
digitized.DNA[digitized.DNA=="B"]<-0
digitized.DNA[digitized.DNA=="V"]<-0
digitized.DNA[digitized.DNA=="D"]<-0
digitized.DNA<-as.numeric(digitized.DNA)
#digitized.DNA<-as.matrix(digitized.DNA)
digitized.DNA2<-array(digitized.DNA,dim=dim(seqs))
dim(digitized.DNA2)
return(digitized.DNA2)
}
eucl.dist.two.vect<-function(v1,v2){
v1minusv2<-v1-v2
squared.v1minusv2<-v1minusv2*v1minusv2
out.sqrt<-sqrt(sum(squared.v1minusv2))
return(out.sqrt)
}### end of fucntion
### 2.1 dealing with input DNA data!
### 2.2 calculate species center vectors
#morph.spe<-gsub(".+,","",rownames(Ref)) # remove sequence ID before ","
morph.spe<-gsub(".+,","",NAMES(ref)) # remove sequence ID before ","
#no.morph.spe<-length(unique(morph.spe))
ref<-ref[,seg.sites(ref)]
ref<-digitize.DNA(ref)
rownames(ref)<-morph.spe #sampleSpeNames
#species.centers<-aggregate(scale(Ref),by=list(morph.spe),FUN="mean")
species.centers<-aggregate(ref,by=list(morph.spe),FUN="mean")
list.spe<-species.centers[,1]
species.centers<-species.centers[,-1]
### 2.3 seek NN for PS (all species in this case!)
### 2.3.1.calculate pair distance of species centers
units.dist<-dist(species.centers, method = "euclidean", diag = F, upper = T, p = 2)
units.dist0<-units.dist
units.dist<-as.matrix(units.dist) ### important!
#units.dist0<-units.dist
for (i in 1: nrow(units.dist)) {units.dist[i,i]<-NA}
#####
### 2.4. look for elements (their indices) with minimal distance to each other
index1<-numeric(length(unique(morph.spe)))
index2<-index1
min.dist<-index1
for (i in 1:nrow(units.dist)){
# i<-1
index1[i]<-i
b<-which.min(units.dist[i,])
if (length(b)==0)
{index2[i]<-NA
min.dist[i]<-NA}
else {index2[i]<-b
min.dist[i]<-min(units.dist[i,],na.rm=T)}
} ### for loop
pairs<-rbind(index1,index2)
pairs<-t(pairs)
#class(pairs)
pairs<-subset(pairs,subset=!is.na(pairs[,2]))
theta1.tmp<-numeric(length(unique(morph.spe)))
Spp<-morph.spe ### seqsRef$unit.classif
#seqs<-scale(digitized.locus) ### seqsRef$data
uniSpeNames<-unique(Spp)
f<-factor(Spp)
#####
###################################
### popSize calculation
###################################
popSize.PS<-as.numeric(table(Spp))
###################################
### theta1,2 calculation
###################################
### for i<-1
#source("eucl.dist.two.vect.R")
seqInOneSpe<-ref[grep(levels(f)[1], Spp, value = FALSE,fixed = TRUE),]
#dim(seqInOneSpe)==NULL
ifelse(popSize.PS[1]==1,centroid.spe<-seqInOneSpe,centroid.spe<-apply(seqInOneSpe,2,mean)) ###
ifelse(popSize.PS[1]==1,centroid.spe0<-seqInOneSpe,centroid.spe0<-apply(seqInOneSpe,2,mean)) ### centroid.spe0 -
length.sites<-length(centroid.spe)
ifelse(popSize.PS[1]==1,intra.dist<-0,intra.dist<-apply(seqInOneSpe,1,eucl.dist.two.vect,v2=centroid.spe)) ### to the centroid
ifelse(popSize.PS[1]==1,theta1.tmp[1]<-0,theta1.tmp[1]<-3*sd(intra.dist)) ### theta1 is sligthly different from
#ifelse(popSize.PS[1]==1,theta1.tmp[1]<-0,theta1.tmp[1]<-max(intra.dist)) ### theta1 is sligthly different from
for(i in 2:length(levels(f))){
cat(paste("i=",i),"\n")
#seqInOneSpe<-sDNAbin[grep(levels(f)[i], Spp, value = FALSE,fixed = TRUE),]
seqInOneSpe<-ref[grep(levels(f)[i], Spp, value = FALSE,fixed = TRUE),]
ifelse(popSize.PS[i]==1,centroid.spe0<-seqInOneSpe,centroid.spe0<-apply(seqInOneSpe,2,mean))
#ifelse(popSize.PS[i]==1,centroid.spe<-seqInOneSpe,centroid.spe<-apply(seqInOneSpe,2,mean))
ifelse(popSize.PS[i]==1,centroid.spe<-c(centroid.spe,seqInOneSpe),centroid.spe<-c(centroid.spe,apply(seqInOneSpe,2,mean)))
#ifelse(dim(seqInOneSpe)==NULL,theta1.tmp[i]<-0,theta1.tmp[i]<-max(dist(seqInOneSpe))) # error!
#ifelse(popSize.PS[i]==1,theta1.tmp[i]<-0,theta1.tmp[i]<-max(dist(seqInOneSpe)))
ifelse(popSize.PS[i]==1,intra.dist<-eucl.dist.two.vect(seqInOneSpe,centroid.spe0),intra.dist<-apply(seqInOneSpe,1,eucl.dist.two.vect,v2=centroid.spe0))
#intra.dist<-apply(seqInOneSpe,1,eucl.dist.two.vect,v2=centroid.spe0) ### to the centroid
#ifelse(popSize.PS[i]==1,theta1.tmp[i]<-0,theta1.tmp[i]<-max(intra.dist)) ### theta1 is sligthly different from
ifelse(popSize.PS[i]==1,theta1.tmp[i]<-0,theta1.tmp[i]<-3*sd(intra.dist)) ### theta1 is sligthly different from
}
centroid.spe.matrix<-t(array(centroid.spe,dim=c(length.sites,length(centroid.spe)%/%length.sites)))
### 1.2 calculate theta2 for each pairt of species
###################
#codes<-out.somu$out.som.unique$codes ### seqsRef$codes
codes<-centroid.spe.matrix
theta12.all<-data.frame(list.spe=list.spe,PS=pairs[,1],NN=pairs[,2],stringsAsFactors=TRUE)
#source("eucl.dist.two.vect.R")
theta2.tmp<-numeric(dim(pairs)[1])
for (i in 1:dim(pairs)[1]){
v1<-codes[pairs[i,1],]
v2<-codes[pairs[i,2],]
theta2.tmp[i]<-eucl.dist.two.vect(v1,v2)
}
theta12.all$theta1<-with(theta12.all,theta1.tmp)
theta12.all$theta2<-with(theta12.all,theta2.tmp)
theta12.all$popSize.PS<-with(theta12.all,popSize.PS)
return(theta12.all)
} ### the end of the function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.