R/pseudo_counts.R

"pseudo_counts" <-
function(profile1){

## This will calculate the pseudocounts based on
## f(a,i) = n(a,i) + E*(Sumj n(j,i)*Matrix(j,a))      
##           -------------------------------------
##           n(i) + E
## Where E is the number of pseudocounts to add
## This is the Heinikoff method 
## profile1 is generated by create_profile()
 

## Create the probability matrix
probab<-create_probab()
len<-dim(profile1)[2]

## Sequence Count, assuming at least one ungapped column.
sequence_count<-max(apply(profile1,2,sum)) 

profile_result<-profile1

for(k in 1:len){
    for(j in 1:20){
        sum_col<-0
        sequence_count<-sum(profile1[,k])
        ## Bc=m*Rc where Rc is the number of different residues
        counts <- 5*sum(profile1[,k]>0) 
        if(sequence_count > 0){
            total<-0
            for(i in 1:20){
               sum_col<-sum_col+(profile1[i,k]/sequence_count)*(probab[i,j]/(sum(probab[,i])))
               total<-total+(probab[i,j]/(sum(probab[,i])))
            }    
        }
   profile_result[j,k] <- (profile1[j,k]+(counts*sum_col))/(sequence_count+counts)
   }
}

## Return the number of extra counts for each amino acid,
return(profile_result)


}

Try the bgafun package in your browser

Any scripts or data that you put into this service are public.

bgafun documentation built on April 28, 2020, 7:56 p.m.