R/utils.R

Defines functions BFDR compressFile write_bed read_ped read_bed getVariances writeBinMat readBinMat

Documented in BFDR getVariances read_bed readBinMat read_ped write_bed

 readBinMat=function(filename,byrow=TRUE,storageMode="double"){
 	## Function to read effects saved by BGLR when ETA[[j]]$saveEffects=TRUE
 	## Also works with files generated by ETA[[j]]$compressEffects=TRUE
    if(!storageMode%in%c("single","double")){
        stop("storageMode can either be 'single' or 'double' (default)")
    }
  	fileIn=gzfile(filename,open='rb')
 	n=readBin(fileIn,n=1,what=numeric(),size=ifelse(storageMode=="single",4,8))
 	p=readBin(fileIn,n=1,what=numeric(),size=ifelse(storageMode=="single",4,8))
 	tmp=readBin(fileIn,n=(n*p),what=numeric(),size=ifelse(storageMode=="single",4,8))
 	X=matrix(data=tmp,nrow=n,ncol=p,byrow=byrow)
 	close(fileIn)
 	return(X)
 }
 
 writeBinMat=function(x,file,byrow=T,storageMode="double"){
    if(!storageMode%in%c("single","double")){
        stop("storageMode can either be 'single' or 'double' (default)")
    }
    n=nrow(x)
    p=ncol(x)
    x=as.vector(x)
    fileOut<-file(file,open='rb')
    writeBin(object=n,con=fileOut,size=ifelse(storageMode=="single",4,8))
    writeBin(object=p,con=fileOut,size=ifelse(storageMode=="single",4,8))
    writeBin(object=x,con=fileOut,size=ifelse(storageMode=="single",4,8))
    close(fileOut)
 }
 
getVariances<-function(X,B,sets,verbose=TRUE)
{

	nSets=length(unique(sets))
	n=nrow(X)
	setLabels=unique(sets)
	nIter=nrow(B)
	VAR=matrix(nrow=nIter,ncol=(nSets+1),NA)
	colnames(VAR)<-c(setLabels,'total')
	XList=list()

	for(i in 1:nSets)
	{
		index<-sets==setLabels[i]
		XList[[i]]<-list(index=index,X=X[,index,drop=F])
	}

	rm(X)

	for(i in 1:nIter)
	{
		
		yHat<-rep(0,n)

		for(j in 1:nSets)
		{
			uHat<-XList[[j]]$X%*%as.vector(B[i,XList[[j]]$index])
			VAR[i,j]<-var(uHat)
			yHat=yHat+uHat
		}

		if(verbose){ cat(' Working iteration',i,'(out of',nIter,')\n')}
		VAR[i,(nSets+1)]<-var(yHat)
	}
	return(VAR)
}
 


#
#Rotines for Plink support
#http://pngu.mgh.harvard.edu/~purcell/plink/
#

#The documentation for the C functions can be found in src/util_plink.c

#This function will read a bed file (binary file for genotypes in plink format)
#Arguments: 
#bed_file: Name for bed file
#bim_file: Name for bim file
#fam_file: Name for fam file
#na_strings: Missing value indicator
#verbose: Logical, it TRUE it will print information generated when reading the bed file.
#it returns a list with 3 components, n: number of individuals, p: number of markers, 
#x, integer vector of dimensions n*p with genotypic information.
#see demo/read_bed.R for an example
read_bed=function(bed_file,bim_file,fam_file,na.strings=c("0","-9"),verbose=FALSE)
{
	#Extended map file (this gives the number of snps)
	bim=read.table(bim_file, comment.char="", as.is=TRUE, na.strings=na.strings)
	snps=as.character(bim[,2])

	#First 6 columns of ped file (this gives the number of individuals)
	fam=read.table(fam_file, comment.char="", as.is=TRUE, na.strings=na.strings)

	n=nrow(fam)
	p=length(snps)

	out=rep(0,n*p)

        if(verbose)
        {
           verbose=1
        }else 
        {   
           verbose=0
        }

	bed_file=path.expand(bed_file)
	out=.C("read_bed_",bed_file,as.integer(n),as.integer(p),as.integer(out),as.integer(verbose))[[4]]
        return(list(n=n,p=p,x=out))
}

#This function will read a ped file
#Note: It assumes that the missing value is 0
#It returns a list with 3 components, n: number of individuals, p: number of markers, 
#x, integer vector of dommensions n*p with genotypic information.
#see demo/read_ped.R for an example
read_ped=function(ped_file)
{
	ped_file=path.expand(ped_file)
	out=.Call("read_ped_",ped_file)
	return(out)
}

#This function will write a bed file (binary file for genotypes in plink format)
#x: vector with genotypic information
#n: number of individuals
#p: number of markers
#bed_file: Output file in bed format
#See demo/write_bed.R for an example
write_bed=function(x,n,p,bed_file)
{
   	#Check inputs

   	if(!is.numeric(x)) stop("x should be an integer and numeric");
   	if(min(x)<0) stop("Supported codes are 0,1,2,3");
   	if(max(x)>3) stop("Supported codes are 0,1,2,3");
   	if(n<=0) stop("n should be bigger than 0");
   	if(p<=0) stop("p should be bigger than 0");
   	if(length(x)!=n*p) stop("length of x is not equal to n*p");
          
	#Function call
	bed_file=path.expand(bed_file)
	.C("write_bed_", bed_file, as.integer(n), as.integer(p), as.integer(x))
}


compressFile=function(pathname,bufferSize=1e6,keep=FALSE){
    conIn=file(pathname,"rb")
    conOut=gzfile(paste0(pathname,".gz"),"wb")
    while(length(bytesRead<-readBin(conIn,"raw",bufferSize))>0){
        writeBin(bytesRead,conOut)
    }
    close(conIn)
    if(!keep){
        unlink(pathname)
    }
    close(conOut)
}

# Compute Bayesian FDR
BFDR=function(prob){
    origNames=names(prob)
    names(prob)=1:length(prob)
    prob=sort(prob,decreasing=TRUE)
    bfdr=1-cumsum(prob)/1:length(prob)
    # Recover the original order and names
    bfdr=bfdr[order(as.integer(names(prob)))]
    names(bfdr)=origNames
    return(bfdr)
}

Try the BGLR package in your browser

Any scripts or data that you put into this service are public.

BGLR documentation built on May 12, 2022, 1:06 a.m.