inst/example/battenberg_snp6.R

args = commandArgs(TRUE)
TUMOURNAME = toString(args[1])
NORMALCEL = toString(args[2])
TUMOURCEL = toString(args[3])
RUN_DIR = toString(args[4])

NORMALNAME = NA
SKIP_PREPROCESSING = F
SKIP_PHASING = F
library(Battenberg)

###############################################################################
# 2017-10-03
# A pure R Battenberg v2.2.7 SNP6 pipeline implementation.
# sd11@sanger.ac.uk
###############################################################################

# Parallelism parameters
NTHREADS = 8

# General static
IMPUTEINFOFILE = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_impute/impute_info.txt"
G1000PREFIX = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_1000genomesloci2012/1000genomesAlleles2012_chr"
IMPUTE_EXE = "impute2"

# General SNP6 specific
PROBLEMLOCI = NA
SNP6_REF_INFO_FILE = "/lustre/scratch116/casm/team113/sd11/reference/GenomeFiles/battenberg_snp6/snp6_ref_info_file.txt"
APT_PROBESET_GENOTYPE_EXE = "apt-probeset-genotype"
APT_PROBESET_SUMMARIZE_EXE = "apt-probeset-summarize"
NORM_GENO_CLUST_EXE = "normalize_affy_geno_cluster.pl"
BIRDSEED_REPORT_FILE = "birdseed.report.txt" # No control over the name of this file, as it is automatically generated by APT within cel2baf.logr

# Parameters
PLATFORM_GAMMA = 0.55
PHASING_GAMMA = 1
SEGMENTATION_GAMMA = 10
SEGMENTATIIN_KMIN = 3
PHASING_KMIN = 1
CLONALITY_DIST_METRIC = 0
ASCAT_DIST_METRIC = 1
MIN_PLOIDY = 1.6 #1.6
MAX_PLOIDY = 4.8 #4.8
MIN_RHO = 0.13 #0.1
MAX_RHO = 1.02 #1
MIN_GOODNESS_OF_FIT = 0.63
BALANCED_THRESHOLD = 0.51
MIN_NORMAL_DEPTH = 10
CALC_SEG_BAF_OPTION = 1
HETEROZYGOUSFILTER = "none"

# Change to work directory and load the chromosome information
setwd(RUN_DIR)

battenberg(tumourname=TUMOURNAME, 
           normalname=NORMALNAME, 
           tumour_data_file=TUMOURCEL, 
           normal_data_file=NORMALCEL, 
           imputeinfofile=IMPUTEINFOFILE, 
           g1000prefix=G1000PREFIX, 
           problemloci=PROBLEMLOCI, 
           data_type="snp6",
           impute_exe=IMPUTE_EXE,
           nthreads=NTHREADS,
           platform_gamma=PLATFORM_GAMMA,
           phasing_gamma=PHASING_GAMMA,
           segmentation_gamma=SEGMENTATION_GAMMA,
           segmentation_kmin=SEGMENTATIIN_KMIN,
           phasing_kmin=PHASING_KMIN,
           clonality_dist_metric=CLONALITY_DIST_METRIC,
           ascat_dist_metric=ASCAT_DIST_METRIC,
           min_ploidy=MIN_PLOIDY,
           max_ploidy=MAX_PLOIDY,
           min_rho=MIN_RHO,
           min_goodness=MIN_GOODNESS_OF_FIT,
           uninformative_BAF_threshold=BALANCED_THRESHOLD,
           calc_seg_baf_option=CALC_SEG_BAF_OPTION,
           skip_preprocessing=SKIP_PREPROCESSING,
           skip_phasing=SKIP_PHASING,
           snp6_reference_info_file=SNP6_REF_INFO_FILE, 
           apt.probeset.genotype.exe=APT_PROBESET_GENOTYPE_EXE, 
           apt.probeset.summarize.exe=APT_PROBESET_SUMMARIZE_EXE,
           norm.geno.clust.exe=NORM_GENO_CLUST_EXE, 
           birdseed_report_file=BIRDSEED_REPORT_FILE,
           heterozygousFilter=HETEROZYGOUSFILTER)




# # Change to work directory and load the chromosome information
# setwd(RUN_DIR)
# chrom_names = get.chrom.names(IMPUTEINFOFILE, TRUE)
# 
# # Setup for parallel computing
# clp = makeCluster(NTHREADS)
# registerDoParallel(clp)
# 
# # Extract the LogR and BAF from both tumour and normal cel files.
# cel2baf.logr(normal_cel_file=NORMALCEL, 
#              tumour_cel_file=TUMOURCEL, 
#              output_file=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
#              snp6_reference_info_file=SNP6_REF_INFO_FILE, 
#              apt.probeset.genotype.exe=APT_PROBESET_GENOTYPE_EXE, 
#              apt.probeset.summarize.exe=APT_PROBESET_SUMMARIZE_EXE, 
#              norm.geno.clust.exe=NORM_GENO_CLUST_EXE)
# 
# gc.correct(samplename=TUMOURNAME, 
#            infile.logr.baf=paste(TUMOURNAME, "_lrr_baf.txt", sep=""), 
#            outfile.tumor.LogR=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
#            outfile.tumor.BAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""), 
#            outfile.normal.LogR=paste(TUMOURNAME, "_germlineLogR.tab", sep=""), 
#            outfile.normal.BAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
#            outfile.probeBAF=paste(TUMOURNAME, "_probeBAF.txt", sep=""),
#            snp6_reference_info_file=SNP6_REF_INFO_FILE,
#            birdseed_report_file=BIRDSEED_REPORT_FILE,
#            chr_names=chrom_names)
# 
# # Infer what the gender is
# gender = infer_gender_birdseed(BIRDSEED_REPORT_FILE)
# is_male = gender == "male"
# chrom_names = get.chrom.names(IMPUTEINFOFILE, is_male)
# 
# foreach(chrom=1:length(chrom_names), .export=c("generate.impute.input.snp6","run.impute","combine.impute.output","GetChromosomeBAFs_SNP6","plot.haplotype.data")) %dopar% {
# 
#   # Transform input into a format that Impute2 takes
#   generate.impute.input.snp6(infile.germlineBAF=paste(TUMOURNAME, "_germlineBAF.tab", sep=""), 
#                              infile.tumourBAF=paste(TUMOURNAME, "_mutantBAF.tab", sep=""),
#                              outFileStart=paste(TUMOURNAME, "_impute_input_chr", sep=""), 
#                              chrom=chrom, 
#                              chr_names=chrom_names,
#                              problemLociFile=PROBLEMLOCI, 
#                              snp6_reference_info_file=SNP6_REF_INFO_FILE, 
#                              imputeinfofile=IMPUTEINFOFILE,
#                              is.male=is_male,
#                              heterozygousFilter=HETEROZYGOUSFILTER)
#   
#   # Run impute on the files
#   run.impute(inputfile=paste(TUMOURNAME, "_impute_input_chr", chrom, ".txt", sep=""),
#              outputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
#              is.male=is_male,
#              imputeinfofile=IMPUTEINFOFILE,
#              chrom=chrom,
#              impute.exe=IMPUTE_EXE)
#   
#   # As impute runs in windows across a chromosome we need to assemble the output
#   combine.impute.output(inputfile.prefix=paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt", sep=""),
#                         outputfile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
#                         is.male=is_male,
#                         imputeinfofile=IMPUTEINFOFILE,
#                         region.size=5000000,
#                         chrom=chrom)
# 
#   # Transform the impute output into haplotyped BAFs
#   GetChromosomeBAFs_SNP6(chrom=chrom,
#                          alleleFreqFile=paste(TUMOURNAME, "_impute_input_chr", chrom, "_withAlleleFreq.csv", sep=""),
#                          haplotypeFile=paste(TUMOURNAME, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
#                          samplename=TUMOURNAME,
#                          outputfile=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
#                          chr_names=chrom_names)
#   
#   # Plot what we have until this point
#   plot.haplotype.data(haplotyped.baf.file=paste(TUMOURNAME, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
#                       imageFileName=paste(TUMOURNAME,"_chr",chrom_names[chrom],"_heterozygousData.png",sep=""), 
#                       samplename=TUMOURNAME, 
#                       chrom=chrom, 
#                       chr_names=chrom_names)
#   
#   # Cleanup temp Impute output
#   unlink(paste(TUMOURNAME, "_impute_output_chr", chrom, ".txt_*K.txt*", sep=""))
# }
# 
# # Kill the threads as from here its all single core
# stopCluster(clp)
# 
# # Combine all the BAF output into a single file
# combine.baf.files(inputfile.prefix=paste(TUMOURNAME, "_chr", sep=""), 
#                   inputfile.postfix="_heterozygousMutBAFs_haplotyped.txt", 
#                   outputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
#                   no.chrs=length(chrom_names))
# 
# # Segment the phased and haplotyped BAF data
# # Call segment.baf.phased.sv when SVs are available - not relevant for SNP6 data
# segment.baf.phased(samplename=TUMOURNAME,
#                    inputfile=paste(TUMOURNAME, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
#                    outputfile=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""),
#                    gamma=SEGMENTATION_GAMMA,
#                    phasegamma=PHASING_GAMMA,
#                    kmin=3,
#                    phasekmin=1,
#                    calc_seg_baf_option=CALC_SEG_BAF_OPTION)
# 
# # Fit a clonal copy number profile
# fit.copy.number(samplename=TUMOURNAME,
#                 outputfile.prefix=paste(TUMOURNAME, "_", sep=""),
#                 inputfile.baf.segmented=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
#                 inputfile.baf=paste(TUMOURNAME,"_mutantBAF.tab", sep=""), 
#                 inputfile.logr=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
#                 dist_choice=CLONALITY_DIST_METRIC, 
#                 ascat_dist_choice=ASCAT_DIST_METRIC, 
#                 min.ploidy=MIN_PLOIDY, 
#                 max.ploidy=MAX_PLOIDY, 
#                 min.rho=MIN_RHO, 
#                 max.rho=MAX_RHO,
#                 min.goodness=MIN_GOODNESS_OF_FIT, 
#                 uninformative_BAF_threshold=BALANCED_THRESHOLD, 
#                 gamma_param=PLATFORM_GAMMA, 
#                 use_preset_rho_psi=F, 
#                 preset_rho=NA, 
#                 preset_psi=NA, 
#                 read_depth=30)
# 
# # Go over all segments, determine which segements are a mixture of two states and fit a second CN state
# callSubclones(sample.name=TUMOURNAME, 
#               baf.segmented.file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
#               logr.file=paste(TUMOURNAME,"_mutantLogR.tab", sep=""), 
#               rho.psi.file=paste(TUMOURNAME, "_rho_and_psi.txt",sep=""), 
#               output.file=paste(TUMOURNAME,"_subclones.txt", sep=""), 
#               output.figures.prefix=paste(TUMOURNAME,"_subclones_chr", sep=""), 
#               output.gw.figures.prefix=paste(TUMOURNAME,"_BattenbergProfile", sep=""),
#               masking_output_file=paste(TUMOURNAME,"_segment_masking_details.txt", sep=""),
#               chr_names=chrom_names, 
#               gamma=PLATFORM_GAMMA, 
#               segmentation.gamma=NA, 
#               siglevel=0.05, 
#               maxdist=0.01, 
#               noperms=1000,
#               max_allowed_state=250, 
#               sv_breakpoints_file="NA",
#               calc_seg_baf_option=CALC_SEG_BAF_OPTION)
# 
# # Make some post-hoc plots
# make_posthoc_plots(samplename=TUMOURNAME, 
#                    logr_file=paste(TUMOURNAME, "_mutantLogR.tab", sep=""), 
#                    subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""), 
#                    rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""), 
#                    bafsegmented_file=paste(TUMOURNAME, ".BAFsegmented.txt", sep=""), 
#                    logrsegmented_file=paste(TUMOURNAME, ".logRsegmented.txt", sep=""), 
#                    allelecounts_file=NULL)
# 
# # Save refit suggestions for a future rerun
# cnfit_to_refit_suggestions(samplename=TUMOURNAME, 
#                            subclones_file=paste(TUMOURNAME, "_subclones.txt", sep=""),
#                            rho_psi_file=paste(TUMOURNAME, "_rho_and_psi.txt", sep=""),
#                            gamma_param=PLATFORM_GAMMA)
Wedge-lab/battenberg documentation built on July 31, 2023, 1:32 p.m.