R/bayhapFreq.R
In BayHap: Bayesian analysis of haplotype association using Markov Chain Monte Carlo

`bayhapFreq` <-
function(data, na.snp.action = "omit", snps.name = NULL,
                 col.snps = NULL, sep = "/", total.iter.haplo = 10000,
                 devhaplo = 0.1, dist = 1, lag.number = 10, sign =
                 0.05, file = FALSE, verbose = 2)




{
       burn.in<-5000
       type.pheno<-0
       freqmin<-0
       devlogit<-0
       type.model<-0
       varcont<-0
       ncovars<-0
       covariates.vec<-0
       covars.name<-"a"
       inters<-0
       intcovars<-0
       prior<-0
       mu<-0
       varp<-0
       burn.in.pheno<-100
       #1000
       total.iter<-100
       #total.iter.haplo 
       covar<-FALSE
    

    
    if (!is.data.frame(data)) stop("Data is not a data.frame object. \n")
    if (sum(apply(data,2,is.factor))>0) stop("Some data columns are factors. \n")
    if ((freqmin>1)||(freqmin<0)) stop("freqmin is negative or higher than 1 \n")
    if (devhaplo<0) stop("devhaplo value is negative \n")
    if (lag.number>total.iter.haplo) stop("lag number must to be lower than number of iters \n")
    if (sign>1) stop("sign value is out of the interval (0,1) \n")
    if ((length(col.snps)!=0 & length(col.snps)==1)||(length(snps.name)!=0 & length(snps.name)==1)){ 
         stop("There are an unique SNP, is not possible to compute haplotype frequencies. \n")
    }
    
    
    if (length(snps.name)!=0) geno<-data[,names(data)%in%snps.name]
    if (length(col.snps)!=0){
        geno<-data[,col.snps]
        snps.name<-names(data)[col.snps]
    }
    if ((length(snps.name)==0)&(length(col.snps)==0)) snps.name<-names(data)
      
    if (na.snp.action=="omit"){
        geno<-geno[(apply(apply(geno,2,is.na),1,sum)==0),]  
        miss<-0
    }else{
        if (na.snp.action=="keep"){
    geno<-geno[apply(geno,1,FUN=function(x) sum(is.na(x))!=ncol(geno)),]
    miss<-ifelse(sum(apply(apply(as.data.frame(geno),2,is.na),1,sum)==0)==ncol(geno),0,1)
        }else{
           stop("na.snp.action value is not correct. \n")
        }
    } 
    if (nrow(geno)==0) 
        stop("After apply na.geno.action there are not enough rows with non missing values to perform the analyse.\n")

   
    is.snp(geno,sep) 
   

    nindiv<-nrow(geno)
    nlocus<-ncol(geno)
    pheno<-rep(0,nindiv)
    time<-rep(0,nindiv)

    mat<-NULL

    alleles_mat<-NULL

    alleles<-NULL

    
    dades<-geno 
    numindivexp<-nindiv
    weights<-rep(1,nindiv)
  
    j<-1

    i<-1

    for (j in 1:nlocus)

        {

                geno<-genotype(dades[,j],sep=sep)

                a<-allele(geno) 

                alleles<- sort(attr(a,"allele.names"))

                if (length(alleles)==1) alleles<-c(alleles,alleles)

                alleles_mat<-rbind(alleles_mat,alleles)

                a[is.na(a)]<-"8"

                a[a==alleles[1]]<-"0"

                a[a==alleles[2]]<-"1"

                mat<-cbind(mat,a)

        } 

    mat<-apply(mat,2,as.numeric)

    
    cat("Computing haplotype pairs. \n")

    pairs<-haplo.pairs(mat)
    n.pairs.indiv<-pairs[[1]]    
    pairs<-as.numeric(unlist(strsplit(unlist(pairs[2]),split="-")))
    pairs.m<-matrix(pairs,byrow=TRUE,ncol=2)
    codi.haplo.cromo1<-pairs.m[,1]
    codi.haplo.cromo2<-pairs.m[,2]


       

    genotypes.vec<-c(t(mat))

    genotypes.vec<-as.numeric(genotypes.vec)

  
    id.outhaplo<-paste(gsub("[./: ]","",format(Sys.time(), "%d/%m/%y %H:%M:%OS3")),paste(sample(LETTERS)[1:10],collapse=""),sep="")

    res.c <- .C("mcmc",numindiv=as.integer(nindiv),

               numindivexp=as.integer(numindivexp),

               weights=as.double(weights),

               numlocus=as.integer(nlocus),

               burn.in=as.integer(burn.in),

               burn.in.haplo=as.integer(total.iter.haplo),

               burn.in.pheno=as.integer(burn.in.pheno),

               total.iter=as.integer(total.iter),

               genotypes=as.integer(0),

               phenotype=as.integer(pheno),

               max_haplo=as.integer(1000),

               distribution=as.integer(dist),

               type.pheno=as.integer(type.pheno),

               freqmin=as.double(freqmin),

               devhaplo=as.double(devhaplo),

               devlogit=as.double(devlogit),

               lag.number=as.integer(lag.number),

               time_var=as.double(time),

               mem_error=as.integer(0),

               res_freq=as.double(numeric(2^nlocus)),

               res_se_freq=as.double(numeric(2^nlocus)),

               res_coeff=as.double(numeric(2^nlocus+ncovars+2^nlocus*ncovars+1)),

               res_se_coeff=as.double(numeric(2^nlocus+ncovars+2^nlocus*ncovars+1)),

               haplos_int=as.double(numeric(2^nlocus)),

               llista_haplos=as.integer(numeric((2^nlocus)*nlocus)),

               n_coeff=as.integer(0),

               type_model=as.integer(type.model),

               var_cont=as.double(varcont),

               num_covar=as.integer(ncovars),

               covariates=as.double(covariates.vec),

               interactions=as.integer(inters),

               int_covars=as.integer(intcovars),

               prior=as.integer(prior),

               mu=as.double(mu),

               varp=as.double(varp),
               hap1_R=as.integer(codi.haplo.cromo1),
               hap2_R=as.integer(codi.haplo.cromo2),
               n_pairs=as.integer(n.pairs.indiv),
               verbose=as.integer(verbose),
               versem_max=as.double(0),
               onlyfreq=as.integer(1),
               id_outhaplo=as.character(id.outhaplo)

               )


        if (res.c$mem_error==1) stop("Memory error due to number of subjects.")

        if (res.c$mem_error==2) stop("Memory error due to number of locus.") 
 
        unlink(paste("outhaplo",id.outhaplo,".txt",sep=""))
        unlink("a.txt")
        unlink("b.txt")
        mat.iters<-import(paste("outhaplo2",id.outhaplo,".txt",sep=""))  
        if (file==FALSE)  unlink(paste("outhaplo2",id.outhaplo,".txt",sep="")) 
        
        res.c$llista_haplos<-haplo.list.all(nlocus)
        r<-list(res.c,sign,sep,snps.name,alleles_mat,mat.iters)                                

class(r)<-"freq"
return(r)

}

Any scripts or data that you put into this service are public.

BayHap documentation built on May 2, 2019, 12:44 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BayHap
Bayesian analysis of haplotype association using Markov Chain Monte Carlo

R/bayhapFreq.R
In BayHap: Bayesian analysis of haplotype association using Markov Chain Monte Carlo

Try the BayHap package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BayHap Bayesian analysis of haplotype association using Markov Chain Monte Carlo

R/bayhapFreq.R In BayHap: Bayesian analysis of haplotype association using Markov Chain Monte Carlo

Try the BayHap package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BayHap
Bayesian analysis of haplotype association using Markov Chain Monte Carlo

R/bayhapFreq.R
In BayHap: Bayesian analysis of haplotype association using Markov Chain Monte Carlo