R/fccac.R
In fCCAC: functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

Documented in fccac

                 
fccac <- function(peaks, bigwigs, labels, splines=10, nbins=100, ncan=5 , tf=c(), main="", bar=NULL, outFiles=FALSE ){    
		
	#Read genomic regions in BED format with Bioconductor package genomation
	print("Reading peaks...")
	peaks <- readGeneric(file=peaks, chr=1, start=2, end=3)  #[1:100]  #readBed
	  
	if (ncan > splines | ncan > length(peaks) ){ print("ncan should not be higher than the number of splines or peaks. Please lower the value of ncan.")   }

	if (ncan <= splines | ncan <= length(peaks)){
		
		print("Starting fCCAC...")

		SPLINES = splines   #
		L = nbins           #bp

		#Modify labels to detect replicates (assume replicates are ordered 1,2,...)
		d <- duplicated(labels)
		new_labels <- rep("newLabel", length(labels) )       #c()
		for (l in seq(from=1, to=length(labels), by=1)  ) {
			if (d[l] == FALSE) {  counter <- 1; new_labels[l]  <- paste(labels[l], 1, sep="_Rep") }
			if (d[l] == TRUE)  {  counter <- counter +1;  new_labels[l]  <- paste(labels[l], counter, sep="_Rep")  } 	
		
		}
	
	
		#Create B-spline basis system (cubic, order 4) of the signals before FDA:
		bspl <- create.bspline.basis(rangeval=c(-L/2,L/2), nbasis=SPLINES, norder=4)   # Cubic B-splines
		argvalsBS <- seq(from= (-L/2), to=(L/2), by=1)      #(-L/2):(L/2)

		
		#Read bigWig Files 
		fdaData <- lapply(seq(from=1, to=length(bigwigs), by=1)  , function(x) matrix(NA, nrow=length(peaks) , ncol=L+1 ))      #list()

		for (i in seq(from=1, to=length(bigwigs), by=1)   ) {
			print(paste(c("Reading bigWig file...",i,"/",length(bigwigs)),collapse="")  )

			fdamatrix <- matrix(0.0, ncol=L+1, nrow= length(peaks) ) 
			fdamatrix  <- ScoreMatrixBin(target = bigwigs[i], bin.num = L+1, windows = peaks, type="bigWig",rpm=FALSE, strand.aware = TRUE, bin.op="max" )
			fdaData[[i]] <- Data2fd(y=t(fdamatrix), argvals= argvalsBS, basisobj=bspl) 			

		}
		rm(fdamatrix)
		#length(fdaData)


		

		co <- combn(x=c(new_labels), m=2)  	# all possible pairwise combinations


		# Select Sample of Interest (if any)
				
		#if(length(tf)==1){
		#	tf_co<-c()
		#	for (j in seq(from=1, to=ncol(co), by=1)  ){
		# 		if ( length( grep(pattern=tf, x=co[,j]) ) >0  ) tf_co <- c(tf_co, j)
		#	}
		#	co <- co[,tf_co]
		#}
		if(length(tf)==1){
    			idx <- unique(col(co)[grepl(tf, co)])
    			co <- co[, idx] 
  		}
	
			
		#Prepare Data for functional CCA
			
		#x <- list()             # list to store output of cca.fd
		scc <- rep(0, ncan*ncol(co) )  #c()  		# squared Canon. Corr.
		sccM <- rep(0, ncan*ncol(co) ) #c() 		# MAX squared Canon. Corr.
		pair <- rep("pair", ncan*ncol(co) ) #c() 		# labeling
		Spair <- rep("Spair", ncol(co) ) #c() 				# labeling for S
			
		w <-  1/ ( seq(from=1, to=ncan, by=1)  )
		Ma <-  sum(w) 		# maximum possible value for w
		S <- rep(0, ncol(co) )  		# weighted sums
		x <- lapply(seq(from=1, to=ncol(co), by=1)  , function(x) NA) #x <- list()    # list to store output of cca.fd

			
		for (i in seq(from=1, to=ncol(co), by=1)  ) {
			print(paste(c("Performing fCCA in pair...",i,"/",ncol(co)) ,collapse="") )
		
			file1 <- which(new_labels==co[1,i])
			file2 <- which(new_labels==co[2,i])
		
			print(paste(c(new_labels[file1],"...vs...",new_labels[file2]) , collapse="") )
	
			x[[i]] = cca.fd(fdobj1=fdaData[[file1]], fdobj2=fdaData[[file2]], ncan = ncan, ccafdPar1=fdPar(bspl, 0, 0), ccafdPar2=fdPar(bspl,0, 0), centerfns=TRUE)
	
		}
		#NCAN calculated in 'cca.fd' is always the number of splines
		#plot.cca.fd(x)


		for (i in seq(from=1, to=ncol(co), by=1)  ) {
	
			posit <- (1+(ncan*(i-1))):(ncan*(i-1+1) )
			
			scc[posit] <-  x[[i]]$ccacorr[ seq(from=1, to=ncan, by=1) ]                         #all canonical correlations
			sccM[posit] <-  rep( max (x[[i]]$ccacorr[ seq(from=1, to=ncan, by=1) ]), ncan)    #max of all canonical correlations

			file1 <- which(new_labels==co[1,i])
			file2 <- which(new_labels==co[2,i])
			pair[posit] <-  rep( paste(new_labels[c(file1,file2)]  , collapse="_vs_") ,ncan)  
			Spair[i] <- paste(new_labels[c(file1,file2)], collapse="_vs_" ) 

			#calculate weigthed sum
			S[i] <- sum(w* x[[i]]$ccacorr[ seq(from=1, to=ncan, by=1) ]    )
			
			rm(posit, file1, file2)

		}


		# if ( is.null(bar) == TRUE ) {bar <- ncol(co) }  ##barplot(100* S/Ma, ylim= c(0,100)  )


		#Colormap
		#colfunc <- colorRampPalette(c("cyan","blue", "black","darkred","red" ,"green" )) 
		#colfunc <- colorRampPalette(c("#132B43","#56B1F7"))
		colfunc <- colorRampPalette(brewer.pal(10, "Spectral"))

		#Plots

		#ggplot2
		ggData <- data.frame(scc = scc, pair = pair, variables = rep( seq(from=1, to=ncan, by=1) , ncol(co)), sccM = sccM)	
		# head (ggData ) 
		ggData <- transform(ggData,  pair = reorder(pair, sccM))
		#head (ggData ) 
		p1 <- ggplot(ggData, aes(x=variables, y=scc, group=pair))  + geom_line(aes(colour = pair)) + ylim(0,1)  + theme_bw() + theme(panel.border = element_rect( colour = "black")) +   theme(legend.position='none', plot.title = element_text(lineheight=.8, face="bold")) + xlab("Canonical variable (k)") + ylab( expression(Squared~canonical~correlation~(R[k] ^{2}) ) )  +   geom_point(size=0.9, shape=5, aes(colour=pair)) + scale_x_continuous(breaks=seq(from=1, to=ncan, by=1)  ) + scale_colour_manual(values = colfunc(ncol(co))) + ggtitle(main)  
		# select 1st canonical correlation
		ggData <- subset(ggData, variables==1 )
		#head (ggData ) 
		#sort and assign colors
		ggData <- ggData[sort(ggData$sccM, index.return=TRUE, decreasing=TRUE)[[2]],]
		ggData$CL <- rev( colfunc(ncol(co))  )
		#head (ggData ) 
		
		
		ggData2 <- data.frame(S = 100* (S/Ma), pair = Spair  )   
		#head (ggData2 ) 
		#ggData2 <- transform(ggData2,  pair = reorder(pair, S))
		ggData2 <- ggData2[sort(ggData2$S, index.return=TRUE, decreasing=TRUE)[[2]],]
		#head (ggData2 ) 
		ggData3 <- merge(x=ggData2, y=ggData, by='pair',  sort = FALSE)
		#ggData3 <- ggData3[sort(ggData3$S, index.return=T, decreasing=T)[[2]],]
		#head (ggData3 ) 
		
		ggDataTXT <- ggData3
			
		
		INB = is.null(bar)
		if ( INB == TRUE ) {   bar <- ncol(co); CHOSEN <- seq(from=1, to=bar, by=1)  ;  ggData3 <- ggData3[CHOSEN,]  }
		if ( INB != TRUE ) {  CHOSEN<-c(seq(from=1, to=bar[1], by=1),  (length(ggData3$CL)-bar[2]+1):length(ggData3$CL)); ggData3 <- ggData3[  CHOSEN , ]	}
		
		p2 <-  ggplot(ggData3, aes(x = reorder(factor(pair),S), y = S)) + geom_bar(stat = "identity",width=.6, fill=reorder(rev(as.character(ggData3$CL)) ,ggData3$S) )+ coord_flip() + ylim(0,100)  + theme_bw() + theme(panel.border = element_rect( colour = "black")) +  theme(legend.position='none', text = element_text(size=10)   )  + ylab("F(%)") + xlab("") + geom_hline(yintercept = 100, colour="red", linetype = "longdash")
		



		colnames(ggDataTXT) <- c("samples","F","squ_can_corr_k_1","k","squ_can_corr_k_1","color"  )
		fccac_out <- ggDataTXT[,c(1,2,3,4,6)]
		
		if (outFiles == TRUE){
			print("Saving fCCAC.pdf...")
			pdf("fCCAC.pdf", height=6, width=3.5)	
			multiplot(p1, p2, cols=1)
			dev.off()
		
			print("Saving fCCAC.txt...")
			write.table(x=fccac_out , file = "fCCAC.txt", append = FALSE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
	
			print("Done...")
		}

		if (outFiles == FALSE){
			multiplot(p1, p2, cols=1)
		}		
	
			
		return( fccac_out )
	
	}
		
}

Any scripts or data that you put into this service are public.

fCCAC documentation built on Nov. 8, 2020, 7:48 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fCCAC
functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

R/fccac.R
In fCCAC: functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

Defines functions fccac

Documented in fccac

Try the fCCAC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fCCAC functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

R/fccac.R In fCCAC: functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

Defines functions fccac

Documented in fccac

Try the fCCAC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fCCAC
functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets

R/fccac.R
In fCCAC: functional Canonical Correlation Analysis to evaluate Covariance between nucleic acid sequencing datasets