R/exporting.R

####
#
#	Functions intended for exporting and/or re-formatting data within pipelines
#	Teemu Daniel Laajala
#
####

# Export rCGH objects to be processed using GISTIC 2.0 in order to harmonize data with cBioPortal CNAs
# 
# From GISTIC 2.0 documentation 
# (URL1: https://cbioportal.readthedocs.io/en/latest/Data-Loading-Tips-and-Best-Practices.html )
# (URL2: ftp://ftp.broadinstitute.org/pub/GISTIC2.0/GISTICDocumentation_standalone.htm )
# (URL3: https://cloud.genepattern.org/gp/pages/login.jsf )
# -> Conflicting information on input files required?
#
# the tab-delimited input segmentation file requires:
#The column headers are: 
#(1)  Sample           (sample name)
#(2)  Chromosome  (chromosome number)
#(3)  Start Position  (segment start position, in bases)
#(4)  End Position   (segment end position, in bases)
#(5)  Num markers      (number of markers in segment)
#(6)  Seg.CN       (log2() -1 of copy number)
exportGISTIC <- function(
	x, # Should be a list of rCGH-objects for which rCGH:segmentCGH has been run, then segmentation file extracted using getSegTable
	file = "inputGISTIC.tsv" # Output file name for GISTIC 2.0
){
	try({
		if(!class(x)=="list" | !all(lapply(x, FUN=class)=="rCGH-Agilent")){
			stop("Input should be a list of rCGH-objects")
		}
		outputs <- lapply(x, FUN=function(z) {
			tmp <- getSegTable(z)[,c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean")]			
			tmp[,"ID"] <- z@info["sampleName"]
			tmp
		})
		output <- do.call("rbind", outputs)
		colnames(output) <- c("Sample", "Chrom", "Start", "Stop", "NumMark", "Seg.CN")
		write.table(output, file=file, sep="\t", row.names=F, col.names=T)
	})
}
Syksy/curatedTools documentation built on May 27, 2019, 9:55 a.m.