R/intersect.R

Defines functions barcodeSuperset union.list intersect.list union intersect incIntersect deBarcode exc2incIntersect exclusiveIntersect0 intersectElements enumerateIntersecSizes jaccard

Documented in deBarcode intersect intersectElements jaccard union

#Utility to do set analysis
#Author: Minghui Wang, minghui.wang@mssm.edu
#Date: 20 July, 2014

#
jaccard=function(x){
	if(!is.list(x)) stop('Input x must be list\n')
	nL=length(x)
	if(nL<2) stop('Input x should have at least two entries\n')
	x=lapply(x,unique)
	Mat=matrix(NA,nL,nL)
	colnames(Mat)=rownames(Mat)=names(x)
	diag(Mat)=1
	for(i in 1:(nL-1)){
		for(j in (i+1):nL) Mat[i,j]=Mat[j,i]=sum(x[[i]] %in% x[[j]])/length(union(x[[i]],x[[j]]))
	}
	Mat
}
#
enumerateIntersecSizes=function(x,degree=NULL){
	if(!is.null(degree)) return(incIntersect(x,degree))
	otab=exclusiveIntersect0(x)
	exc2incIntersect(otab)
}
#list all possible intersections
intersectElements=function(x,mutual.exclusive=TRUE){
#return Venn diagram entry sizes
#x: a list of sets
	if(!is.list(x)) stop('Input x must be list\n')
	nL=length(x)
	if(nL<2) stop('Input x should have at least two entries\n')
	allE=unique(unlist(x))
	barcodes=rep('',length(allE))
	for(i in 1:nL){
		barcodes=paste(barcodes,ifelse(allE %in% x[[i]],'1','0'),sep='')
	}
	Res=data.frame(Entry=allE,barcode=barcodes,stringsAsFactors=FALSE)
	if(mutual.exclusive) return(Res)
	Res=do.call(rbind,lapply(split(Res,Res$barcode),function(z){
		s=barcodeSuperset(z$barcode[1])
		cbind(Entry=rep(z$Entry,length(s)),barcode=rep(s,each=nrow(z)))
	}))
	data.frame(Res,stringsAsFactors=FALSE)
}
#enumerate all overlap and non-overlap set sizes
exclusiveIntersect0=function(x){
#x: a list of sets
#return mutual exclusive intersection sizes including empty intersections
	intersects=intersectElements(x)
	nL=length(x)
	barcodes=mkBarcode(nL)
	otab=sapply(barcodes,function(a) 0)
	tab=table(intersects$barcode)
	otab[names(tab)]=tab
	otab
}
exc2incIntersect=function(x){
#x, an object generated from function exclusiveIntersect0
#return inclusive subset sizes
	otab=x
	otab[]=0
	C1 = lapply(strsplit(names(x),''), function(c11) c11 == '1')
	for(i in 1:length(x)){
		a=C1[[i]]
		rel=sapply(C1,function(b) all(a[b]==TRUE))
		otab[rel]=otab[rel]+x[i]
	}
	otab
}
#reverse barcode
deBarcode <- function(barcode,setnames,collapse=' & '){
	sapply(barcode,function(b,setnames,collapse=' & '){
		s=setnames[strsplit(b,'')[[1]] == '1']
		s=paste(s,collapse=collapse)
		s
	},setnames=setnames,collapse=collapse)
}
#compute intersection sizes for given overlap degree
incIntersect=function(x,degree=NULL){
#x is a list of sets
	if(!is.list(x)) stop('Input x must be list\n')
	nL=length(x)
	if(nL<2) stop('Input x should have at least two entries\n')
	allE=unique(unlist(x))
	nE=length(allE)
	BinMat=sapply(x, function(d) allE %in% d )
	barcodes=mkBarcode.degree(nL,degree)
	otab=sapply(barcodes,function(a) 0)
	for(i in 1:length(otab)){
		i1=which(strsplit(names(otab)[i],'')[[1]] == '1')
		otab[i]=sum(rowSums(BinMat[,i1,drop=FALSE]) == length(i1))
	}
	otab
}
#
intersect=function(x,y,...){
	dat=list(x,y,...)
	if(length(dat)<2) return(unlist(dat))
	common=as.vector(dat[[1]])
    for(i in 2:length(dat)){
		common=unique(common[match(as.vector(dat[[i]]), common, 0L)])
		if(length(common)==0) break
	}
	common
}
union=function(x,y,...){
	dat=list(x,y,...)
	if(length(dat)<2) return(unlist(dat))
	u=as.vector(dat[[1]])
    for(i in 2:length(dat)){
		u=unique(c(u,as.vector(dat[[i]])))
	}
	u
}
intersect.list=function(x){
	if(! is.list(x)) stop('Input must be a list\n')
	if(length(x)<2) return(unlist(x))
	common=as.vector(x[[1]])
    for(i in 2:length(x)){
		common=unique(common[match(as.vector(x[[i]]), common, 0L)])
		if(length(common)==0) break
	}
	common
}
union.list=function(x){
	if(! is.list(x)) stop('Input must be a list\n')
	if(length(x)<2) return(unlist(x))
	u=as.vector(x[[1]])
    for(i in 2:length(x)){
		u=unique(c(u,as.vector(x[[i]])))
	}
	u
}
barcodeSuperset=function(x){
	x1=strsplit(x,'')[[1]]
	ii=which(x1 == '1')
	if( length(ii) <= 1) return(x)
	b=cbind(x1)
	for(i in ii){
		b1=b
		b1[i,]='0'
		b=cbind(b,b1)
	}
	colnames(b)=NULL
	b=b[,- ncol(b),drop=FALSE]
	sort(apply(b,2,paste,collapse=''))
}

Try the SuperExactTest package in your browser

Any scripts or data that you put into this service are public.

SuperExactTest documentation built on March 23, 2022, 5:07 p.m.