R/Linarius.R

Defines functions .onAttach allele.count allele.frec allele.hetero datagen zerodet BjIC Boot.BjIC dist.pop distGPS allele.varience allele.Fst

Documented in allele.count allele.frec allele.hetero datagen distGPS dist.pop

#############################################################################################################################################################################
###                                                                            Linarius.R                                                                                ####
#############################################################################################################################################################################

  .onAttach <- function(...) {
packageStartupMessage("This is NOT a free software, not reading the licence is a violation of the licence, by continuing using it, you are considered aware of the terms of Licence.l2k and accepting them") 
}


#Alleles frequency & heterozygocy 
#' @title  Function to count allele presence and absence of dominant markers according to ploidy levels
#'
#' @description This function aims at counting allele presence and absence in a ballance population, this considering the ploidy level.
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' 
#'@examples 
#' data(Birch)
#' allele.count(Betula,code$Ploidy)
#' 

allele.count<- function(xx,ploidy=2) {
plolev<-sort(unique(ploidy))
NULL->alco
NULL->noms
for(i in plolev) cbind(alco,apply(as.matrix(xx[ploidy==i,])==0,2,sum),apply(as.matrix(xx[ploidy==i,])==1,2,sum))->alco
for(j in plolev) noms<-c(noms,paste(j,"x-absence", sep='',collapse=''),paste(j,"x-presence",sep='', collapse=''))
colnames(alco)<-noms
return(alco)
}
#############################################################################################################################################################################
#sep ploidy al frec
#' @title  Calculation of Allele hetherozygocy with Dominant markers and mixed ploidy levels
#'
#' @description This function aims at evaluating allele hetherozygocy in a ballance population, this considering the ploidy level.  Then It calculated an weighted avarage
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' 
#' @note There is a little biais on small sized sample.
#' 
#' @examples 
#' data(Birch)
#' allele.frec(Betula,code$Ploidy)
#' 
allele.frec<-function(xx,ploidy=2) {
plolev<-sort(unique(ploidy))
NULL->alco
NULL->noms
NULL->glob
for(i in plolev) cbind(alco,(1-(apply(as.matrix(xx[ploidy==i,])==0,2,sum)/(apply(as.matrix(xx[ploidy==i,])==0,2,sum)+apply(as.matrix(xx[ploidy==i,])==1,2,sum)))^(1/i))*100)->alco
#for(i in plolev) cbind(alco, colMeans(xx[ploidy == i, ]^(1/i)*100) -> alco
for(j in plolev) noms<-c(noms,paste(j,"x-frequence(%)", sep="",collapse=""))
colnames(alco)<-noms
for(k in plolev) glob<-c(glob,nrow(xx[ploidy==k,]))
cbind(alco,apply(alco*glob/sum(glob),1,sum))->alco
colnames(alco)<-c(noms,"Frequency")
return(alco)
}
#############################################################################################################################################################################
#sep ploidy al frec 
#' @title  Calculation of Allele frequency with Dominant markers and mixed ploidy levels
#'
#' @description This function aims at evaluating allele frequency in a ballance population, this considering the ploidy level. Then It calculated an weighted avarage
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' @note There is a little biais on small sized sample.
#' 
#'@examples 
#' data(Birch)
#' allele.hetero(Betula,code$Ploidy)
#' 
allele.hetero<-function(xx,ploidy=2) {
plolev<-sort(unique(ploidy))
NULL->alco
NULL->noms
for(i in plolev) cbind(alco,100*(1-((1-(apply(as.matrix(xx[ploidy==i,])==0,2,sum)/(apply(as.matrix(xx[ploidy==i,])==0,2,sum)+apply(as.matrix(xx[ploidy==i,])==1,2,sum)))^(1/i))^i + (apply(as.matrix(xx[ploidy==i,])==0,2,sum)/(apply(as.matrix(xx[ploidy==i,])==0,2,sum)+apply(as.matrix(xx[ploidy==i,])==1,2,sum))))))->alco
for(j in plolev) noms<-c(noms,paste(j,"x-heterozygocy(%)", sep="",collapse=""))
colnames(alco)<-noms
return(alco)
}
#############################################################################################################################################################################
#' @title Generate a fake dataset fitting allele frequencies and ploidies. 
#'
#' @description 
#' This function generates a random binary datafram with in row: simulated individuals, and in colomn: alleles. 
#' The aim of this function is to test hypothesis, and shall not be used for publishing imaginary results (See licence for more information). 
#' @param frec Allele frequencies: a vector providing the allele frequency of all alleles, every single frequency have to be between 0 and 1
#' @param ploidy Ploidy level of every individuals: a vector providing ploidy level of every single individual. Normally integers. 
#' 
#' @note 
#' Whenever you make something out of this function you have to precise that there are generated data. 
#' You shall not use this for getting fake experimental data (it constitues a violation of this package licence ).
#' Whenever you use that function for bad purposes I will find you, get you and kick your ass! 

datagen<-function(frec,ploidy) {
fakedata<-matrix(data = NA, nrow = length(ploidy), ncol = length(frec), byrow = FALSE,  dimnames = NULL)
plolev<-unique(ploidy)
lingen<-function(xx){
	matrix(data = NA, nrow = length(ploidy), ncol = 1, byrow = FALSE,  dimnames = NULL)->allfrec
	 for(i in plolev)
	sample(c(0,1),length(ploidy[ploidy==i]),replace=T,prob=c((1-xx)^i,1-((1-xx)^i)))->allfrec[ploidy==i,]
	return(allfrec)
	}
for(j in 1:length(frec)) lingen(frec[j])-> fakedata[,j]
	return(fakedata)
}

#############################################################################################################################################################################
#Counting 
zerodet<- function(X,factor,PV=5,ID=1) {
	#clean NA
X<-X[!is.na(factor),]
factor<-factor[!is.na(factor)]
    # Count
rbind(colSums(X[factor==ID,]),colSums(X[factor!=ID,]),sum(factor==ID)-colSums(X[factor==ID,]),sum(factor!=ID)-colSums(X[factor!=ID,]))->Contingence
rownames(Contingence)<-c("Pres.in","Pres.out","Abs.in","Abs.out")
apply(Contingence,2, min)->limit
Contingence[,limit<=PV]

}
#############################################################################################################################################################################
#Identity of population 
BjIC<-function(XX,factor,ploidy=2) { 
	allele.frec(XX,ploidy=ploidy)->frec_all
	frec_all<-(frec_all[,ncol(frec_all)])
	levels<-sort(unique(factor))
	0->BIC_VALUE
	for(i in levels){
		
		allele.frec(XX[factor==i,],ploidy=ploidy[factor==i])->BT
		BT<-BT[,ncol(BT)]
BIC_VALUE<-BIC_VALUE+(BT-frec_all)^2
	} 
	BIC_VALUE<-sum(BIC_VALUE)
return(BIC_VALUE)	
	
	}
	#############################################################################################################################################################################
#boot of BIC
Boot.BjIC<-function(XX,factor,ploidy,nboot=100) {
	boot<-vector(mode = "numeric", length = nboot)
	len<-(1:nboot)
	allele.frec(XX,ploidy=ploidy)->frec_all
	frec_all<-(frec_all[,ncol(frec_all)])
	for(i in len) {
		datagen(frec=frec_all/100,ploidy=ploidy)->fake
		colnames(fake)<-names(XX)
		BIC(fake,factor=factor,ploidy=ploidy)->boot[i]
		#Bet <- as.genclone(df2genind(fake, ind.names = row.names(gelplo), type = "PA"))
#sethierarchy(Bet) <- codeplo
#setpop(Bet) <- ~Ploidy
#res <- poppr.amova(Bet, hier = ~Ploidy)
#res[1:4]$componentsofcovariance[1,2]->boot[i]	
}
return(boot)
}

	#############################################################################################################################################################################
#' @title Coloring terminal branch of trees
#'   
#'
#' @description Generate colors for terminal edge branch of a phylogenic tree
#' 
#' @param phy A tree of class phylo
#' @param groups A vector indicating what should be of the same color
#' @param color A vector indicating the color to choose
#' @param root.color A single color, to use for non terminal edge 
#' @note This function should be used with the package ade4 
#' @seealso ade4
#' @examples 
#' data(Birch)
#' require(vegan)
#' require(ade4)
#' require(ape)
#' dist=vegdist(Betula, method="jaccard", binary=TRUE, diag=FALSE, upper=FALSE, na.rm = FALSE)
#' dendro=hclust(dist,"complete")
#' dendro2<-as.phylo(dendro,cex=0.5)
#' plot(dendro2,,type="u",lab4ut="axial",font=1, adj=1,label.offset = 0.01,edge.color = col.edge.phylo(dendro2,code$Ploidy),tip.color = code$Ploidy)




col.edge.phylo<-function (phy,groups,color=c(2,3,4,5),root.color=1)
{
	#col.edge<-rep(root.color,length(phy$tip))
	col.edge<-rep(root.color,length(phy$edge[,1]))
	cbind(phy$edge,col.edge)->Edge
	cbind(Edge,(1:length(phy$edge[,1])))-> Edge
	(Edge[order(as.integer(Edge[,2]), decreasing=F),])->Edge
	groups->Edge[1:length(phy$tip),3]
	(Edge[order(as.integer(Edge[,4]), decreasing=F),])->Edge
	return(Edge[,3])
}
	
	#############################################################################################################################################################################
#' @title Compute genetic distances between populations 
#'   
#'
#' @description Returns a dist object with genetic distences between populations. 4 indexes are availlable: "Euclidean", "Reynolds", "Roger" and "Nei".
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param pop A vector informing of population every sample belongs to 
#' @param method A method for computing genetic distence between populations, can be any unambigous abreviation of "euclidean", "reynolds", "roger" or "nei"
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' @examples 
#' data(Birch)
#' dist.pop(Betula, code$Pop,ploidy=code$Ploidy,method="rey") 

	
	dist.pop<-function(xx,pop, method="reynolds" ,ploidy=2)
{
	if (length(ploidy) == 1) 
        ploidy <- rep(ploidy, length(rownames(xx)))
	if (length(rownames(xx)) != length(ploidy)) 
        stop("Mismatch of number of samples and number of ploidy ID")
	if (!is.na(pmatch(method, "euclidian"))) 	
	if (length(rownames(xx)) != length(pop)) 
        stop("Mismatch of number of samples and number of population ID")
	if (!is.na(pmatch(method, "euclidian"))) 
        method <- "euclidean"
    METHODS <- c("euclidean", "reynolds", "roger", "nei")
    method <- pmatch(method, METHODS)
    if (is.na(method)) 
        stop("invalid distance method")
    if (method == -1) 
        stop("ambiguous distance method")
        FUN<-c(Euclide.dist,Reynolds.dist,Roger.dist,Nei.dist)[[method]]
if (method >= 5) 
      stop("Huston, we've got a problem")   
	pop.lev<-unique(pop)
  dMat <- matrix(0,ncol=length(pop.lev),nrow=length(pop.lev))
  colnames(dMat) <- pop.lev -> rownames(dMat)
  
  for(i in 1:(length(pop.lev)-1))
  {
    for(j in (i+1):length(pop.lev))
    {
      a <- FUN(xx[pop==pop.lev[i],],xx[pop==pop.lev[j],], ploidy[pop==pop.lev[i]], ploidy[pop==pop.lev[j]])
      dMat[i,j] <- a
      dMat[j,i] <- a
     # return(ploidy[pop==pop.lev[j]])
    }
  }
  dMat<-as.dist(dMat)
return(dMat)
}

	#############################################################################################################################################################################
#' @title Compute distance between GPS points 
#'   
#'
#' @description Returns a dist object with  distences between location in km 
#' 
#' @param lon A vector with longitudes 
#' @param lat A vector with latitudes
#' @param names A vector with names of position
#' @param model A model for computing distence between places, can be any unambigous abreviation of "lambert", "spherical" or "potato"
#' @examples 
#' data(Birch)
#' distGPS(code$Lat,code$Long,rownames(code))
distGPS <-function(lat,lon,names,model="lambert")
{
		if (length(lat) != length(lon)) 
        stop("Latitudes and Longitudes difer in size")
        if (length(lat) != length(names)) 
        stop("Incorect number of names")
 #Methode
     METHODS <- c("lambert", "spherical", "potato")
    model <- pmatch(model, METHODS)
    if (is.na(model)) 
        stop("invalid distance method")
    if (model == -1) 
        stop("ambiguous distance method")
FUN<-c(dist.gps.Lambert,dist.gps.cst,dist.gps.var)[[model]]
if (model >= 4) 
      stop("Huston, we've got a problem") 
#Computation             
	  dMat <- matrix(0,ncol=length(lon),nrow=length(lon))
  colnames(dMat) <- names
  rownames(dMat) <- names
  for(i in 1:(length(lon)-1))
  {
    for(j in (i+1):length(lon))
    {
      a <- FUN(lat[i],lon[i],lat[j],lon[j])
      if((lat[i] == lat[j])&&(lon[i]==lon[j])) a<-0
      dMat[i,j] <- a
      dMat[j,i] <- a
    }
  }
dMat<-as.dist(dMat)  
return(dMat)
}

#############################################################################################################################################################################
#' @title Compute variences of allele frequency among populations 
#'   
#'
#' @description Returns a matrix with allele frequencies in all populations and variances of allele frequencies  
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param pop A vector informing of population every sample belongs to 
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' @examples 
#' data(Birch)
#' allele.varience(Betula,ploidy=code$Ploidy,pop=code$Pop)


allele.varience<- function(xx,ploidy=2,pop) {
		if (length(ploidy) == 1) 
        ploidy <- rep(ploidy, length(rownames(xx)))
        if (length(unique(pop)) == 1) 
        stop("Fatal error: Mmmm… User too stupid… Hint: To compute variances among populations, you need several populations")
	if (length(rownames(xx)) != length(ploidy)) 
        stop("Mismatch of number of samples and number of ploidy ID")
	if (length(rownames(xx)) != length(pop)) 
        stop("Mismatch of number of samples and number of population ID")
poplev<-sort(unique(pop))
##ssfunction
allele.frec.pop<-function(xx, ploidy =ploidy,poplist,popname) {
plolev<-sort(unique(ploidy))
NULL->varience
xx<-xx[poplist==popname,]
ploidy<-ploidy[poplist==popname]
allele.frec(xx,ploidy)-> varience
return(varience[,ncol(varience)])
}
##end ssfunction
NULL->variences
NULL->noms
for(i in poplev) cbind(variences,as.matrix(allele.frec.pop(xx,ploidy,poplist=pop,popname=i)))-> variences
for(j in poplev) noms<-c(noms,j)
#colnames(variences)<-noms
length(noms)->n
variences<-variences/100
cbind(variences,(n*(apply(variences,1,sd))/(n-1))^2)-> variences
colnames(variences)<-c(noms,"Variances")
return(variences)
}

#############################################################################################################################################################################
#' @title Compute Fst 
#'   
#'
#' @description Returns a matrix with allele frequencies in all populations and variances of allele frequencies  
#' 
#' @param xx A binary datafram of genotypes, individuals as row and alleles as column 
#' @param pop A vector informing of population every sample belongs to 
#' @param ploidy Interger or vector, ploidy level of the population or individuals 
#' @examples 
#' data(Birch)
#' allele.varience(Betula,ploidy=code$Ploidy,pop=code$Pop)


allele.Fst<- function(xx,ploidy=2,pop) {
		if (length(ploidy) == 1) 
        ploidy <- rep(ploidy, length(rownames(xx)))
        if (length(unique(pop)) == 1) 
        stop("Fatal error: Mmmm… User too stupid… Hint: To compute Fst, you need several populations")
	if (length(rownames(xx)) != length(ploidy)) 
        stop("Mismatch of number of samples and number of ploidy ID")
	if (length(rownames(xx)) != length(pop)) 
        stop("Mismatch of number of samples and number of population ID")
poplev<-sort(unique(pop))

NULL->FST
cbind(as.matrix(allele.frec(xx,ploidy))[,ncol(allele.frec(xx,ploidy))],as.matrix(allele.varience(xx,ploidy,pop))[,ncol(allele.varience(xx,ploidy,pop))])-> FST
cbind(FST,FST[,2]/((FST[,1]/100)*(100-FST[,2])/100))->FST
colnames(FST)<-c("Frequency (%)","Variances","Fst")
return(FST)
}
giby/Linarius documentation built on May 17, 2019, 4:20 a.m.