#' Tajima_D function
#'
#' @param genome_matrix: a matrix of 0,1's. Each row is an individual. Each column is a polymorphism in the sampled population.
#'
#' @return scalar value of Tajima's D
#' @export
#'
#' @examples Tajima_D(genome_matrix)
##### R Version
Tajima_D<-function(s){
return((theta_t(s)-theta_w(s))/(v_theta(s)^0.5))
}
#theta_t gives number of pairwise differences, normalised by the number of pairs
theta_t <- function(genome_matrix){
dim_genomes<-dim(genome_matrix) #grab dimensions of genome matrix.
sample_size<-dim_genomes[1] #getting the number of genomes
#get symmetric manhattan distance matrix
dist_matrix=as.matrix(dist(genome_matrix,method="manhattan"))
#sum up elements of matrix and divide by 2 (symmetric matrix)
pairwise_diff=sum(dist_matrix)*0.5
#normalize by number of possiblem pairs
return (pairwise_diff/(sample_size*(sample_size-1)/2))
}
#theta_w is the expected number of segregating sites
theta_w<-function(s){
sample_size<-dim(s)[1] #get the number of genomes
segsites<-dim(s)[2]
#compute the normalizing constant, derived analytically by Tajima. Add reference later!!!
normalise=0 #initialisation of normalisation constant
for (i in 1:(sample_size-1)){
normalise=normalise+1/i
}
return ((segsites)/(normalise))
}
#variance of theta was derived analytically by Tajima
v_theta<-function(sequence){
n=dim(sequence)[2] #get number of samples
S=dim(sequence)[1] #get num of seg sites
#initialisation
a1=0
a2=0
for (i in 1:(n-1)){
a1=a1+1/i
}
for (i in 1:(n-1)){
a2=a2+(1/i^2)
}
b1=(n+1)/(3*(n-1))
b2=2*(n^2+n+3)/(9*n*(n-1))
c1=b1-(1/a1)
c2=b2-(n+2)/(a1*n)+(a2/a1^2)
e1=c1/a1
e2=c2/(a1^2+a2)
return=(e1*S+e2*S*(S-1))
}
################# end R version
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.