#' IchorCNA implementation for Capture Panel Data
#'
#'
#' @param cnr [REQUIRED] Path to tumour CNR file.
#' @param normal [OPTIONAL] Path to normal samples. Default c(0.5,0.6,0.7,0.8,0.9)
#' @param ploidy [OPTIONAL] Initial tumour ploidy; can be more than one value if additional ploidy initializations are desired. Default: 2
#' @param lambda [OPTIONAL] Initial Student's t precision; must contain 4 values (e.g. c(1500,1500,1500,1500)); if not provided then will automatically use based on variance of data
#' @param scStates [OPTIONAL] Subclonal states to consider. Default NULL
#' @param output_name [OPTIONAL] Name for the output. If not given the name of the first tumour sample of the samples will be used.
#' @param lambdaScaleHyperParam [OPTIONAL] Hyperparameter (scale) for Gamma prior on Student's-t precision. Default 3
#' @param maxCN [OPTIONAL] Total clonal states. Default 7.
#' @param estimateNormal [OPTIONAL] Estimate normal. Default TRUE.
#' @param estimateScPrevalence [OPTIONAL] Estimate subclonal prevalence. Default TRUE.
#' @param estimatePloidy [OPTIONAL] Estimate tumour ploidy. Default TRUE.
#' @param maxFracGenomeSubclone [OPTIONAL] Exclude solutions with subclonal genome fraction greater than this value. Default 0.5
#' @param maxFracCNASubclone [OPTIONAL] Exclude solutions with fraction of subclonal events greater than this value. Default 0.7
#' @param minSegmentBins [OPTIONAL] Minimum number of bins for largest segment threshold required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction
#' @param altFracThreshold [OPTIONAL] Minimum proportion of bins altered required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction. Default: [0.05]
#' @param includeHOMD [OPTIONAL] If FALSE, then exclude HOMD state. Useful when using large bins (e.g. 1Mb). Default FALSE.
#' @param txnE [OPTIONAL] Self-transition probability. Increase to decrease number of segments. Default: [0.9999999]
#' @param txnStrength [OPTIONAL] Transition pseudo-counts. Exponent should be the same as the number of decimal places of --txnE. Default: [1e+07]
#' @param plotFileType [OPTIONAL] File format for output plots. Default pdf
#' @param plotYLim [OPTIONAL] ylim to use for chromosome plots. Default: [c(-2,2)]
#' @param output_dir [OPTIONAL] Path to the output directory.
#' @param threads [OPTIONAL] Number of threads to split the work. Default 4
#' @param ram [OPTIONAL] RAM memory to asing to each thread. Default 4
#' @param verbose [OPTIONAL] Enables progress messages. Default False.
#' @param mode [REQUIRED] Where to parallelize. Default local. Options ["local","batch"]
#' @param executor_id Task EXECUTOR ID. Default "recalCovariates"
#' @param task_name Task name. Default "recalCovariates"
#' @param time [OPTIONAL] If batch mode. Max run time per job. Default "48:0:0"
#' @param update_time [OPTIONAL] If batch mode. Job update time in seconds. Default 60.
#' @param wait [OPTIONAL] If batch mode wait for batch to finish. Default FALSE
#' @param hold [OPTIONAL] HOld job until job is finished. Job ID.
#' @export
ichor_capture=function(
rdata=NULL,
selected=NULL,
cnr="TRAILS_TR033_K180006_BULK_CAPTURE_PCF_V3_HG19_LB1_650_HCGJ3DMXY_1.cnr",
normal=0.5,
ploidy=2,
maxCN=7,
includeHOMD=FALSE,
scStates=NULL,
txnE=0.9999999,
txnStrength=1e7,
output_name="",
output_dir=".",
min_cov=-15,
lambda=NULL,
coverage=NULL,
minSegmentBins=50,
minTumFracToCorrect=0.01,
chrs=c(1:22,"X"),
chrTrain=c(1:22),
gender="male",
maxFracCNASubclone=0.7,
maxFracGenomeSubclone=0.5,
lambdaScaleHyperParam=3,
altFracThreshold=0.05,
estimateNormal=TRUE,
estimatePloidy=TRUE,
estimateScPrevalence=FALSE,
plotYLim=c(-2,2),
plotFileType="pdf",
verbose=FALSE,
batch_config=build_default_preprocess_config(),
threads=4,ram=4,mode="local",
executor_id=make_unique_id("hybridIchorCNA"),
task_name="hybridIchorCNA",time="48:0:0",
update_time=60,
wait=FALSE,hold=NULL
){
options(scipen=0, stringsAsFactors=F)
options(stringsAsFactors=FALSE)
options(bitmapType='cairo')
argg <- as.list(environment())
if(!is.null(rdata)){
load(rdata)
if(!is.null(selected)){
cnr=cnr_list[selected]
}
}
id=""
if(output_name!=""){
id=output_name
}else{
id=get_file_name(cnr)
}
out_file_dir=set_dir(dir=output_dir,name=paste0("ichor_capture/",id))
tmp_dir=set_dir(dir=out_file_dir,name="tmp")
task_id=make_unique_id(task_name)
job=build_job(executor_id=executor_id,task_id=task_id)
outImage <- paste0(out_file_dir,"/",id,".RData")
names(cnr)=Vectorize(get_file_name)(cnr)
numSamples <- length(cnr)
read_tumour_copy=function(file="R/TRAILS_TR067_I260038_BULK_CAPTURE_PCF_V2_HG19_LB1_411_HW57JDMXX_1.cnr",
header=TRUE,sep="\t"){
tumour_copy=read.table(file=file,header=header,sep=sep)
output <- GenomicRanges::GRanges(ranges = IRanges::IRanges(start = tumour_copy$start,
width = tumour_copy$end-tumour_copy$start),
seqnames = tumour_copy$chromosome,
log2 = tumour_copy$log2,
gene = tumour_copy$gene,
depth = tumour_copy$depth,
weight = tumour_copy$weight)
message(paste0("Dropped ",length(output[!as.character(GenomeInfoDb::seqnames(output)) %in% chrs,])," bins with omitted chromosomes."))
return(output[as.character(GenomeInfoDb::seqnames(output)) %in% chrs,])
}
drop_low_coverage=function(tumour_copy,min_cov=-15){
message(paste0(sum( tumour_copy[[1]]$log2<min_cov | tumour_copy[[1]]$depth==0),
" bins excluded from analysis due to low coverage"))
return(!tumour_copy[[1]]$log2<min_cov | tumour_copy[[1]]$depth==0)
}
drop_weight=function(tumour_copy,min_weight=0){
message(paste0(sum(tumour_copy[[1]]$weight<min_weight),
" bins excluded from analysis due to low weight"))
return(!tumour_copy[[1]]$weight<min_weight)
}
drop_outliers=function(tumour_copy,width=50,factor=10,partial=1){
out=as.data.frame(tumour_copy[[1]]) %>% dplyr::group_by(seqnames) %>%
dplyr::mutate(smooth_log2=abs(log2-pracma::savgol(log2,
ifelse(width%%2==0,width+1,width)))) %>%
dplyr::mutate(quant=as.vector(zoo::rollapply(smooth_log2, width = width,
FUN = "quantile", p = .95,fill=FALSE,partial=partial))) %>%
dplyr::mutate(filter=abs(smooth_log2)>(quant*factor))
message(paste0("Dropped ",sum(out$filter)," outlier bins"))
return(!out$filter)
}
tumour_copy<-lapply(cnr,FUN=read_tumour_copy)
low_coverage=drop_low_coverage(tumour_copy)
weight=drop_weight(tumour_copy)
outliers=drop_outliers(tumour_copy)
valid <- as.character(GenomeInfoDb::seqnames(tumour_copy[[1]])) %in% chrs
valid <- valid & low_coverage & weight & outliers
chrInd <- as.character(GenomeInfoDb::seqnames(tumour_copy[[1]])) %in% chrTrain
### RUN HMM ###
## store the results for different normal and ploidy solutions ##
ptmTotalSolutions <- proc.time() # start total timer
results <- list()
loglik <- as.data.frame(matrix(NA, nrow = length(normal) * length(ploidy), ncol = 7,
dimnames = list(c(), c("init", "n_est", "phi_est", "BIC",
"Frac_genome_subclonal", "Frac_CNA_subclonal", "loglik"))))
counter <- 1
compNames <- rep(NA, nrow(loglik))
mainName <- rep(NA, length(normal) * length(ploidy))
#### restart for purity and ploidy values ####
for (n in normal){
for (p in ploidy){
if (n == 0.95 & p != 2) {
next
}
logR <- as.data.frame(lapply(tumour_copy, function(x) { x$log2 })) # NEED TO EXCLUDE CHR X #
param <- getDefaultParameters(logR[valid & chrInd, , drop=F], maxCN = maxCN, includeHOMD = includeHOMD,
ct.sc=scStates, ploidy = floor(p), e=txnE, e.same = 50, strength=txnStrength)
param$phi_0 <- rep(p, numSamples)
param$n_0 <- rep(n, numSamples)
############################################
######## CUSTOM PARAMETER SETTINGS #########
############################################
# 0.1x cfDNA #
if (is.null(lambda)){
logR.var <- 1 / ((apply(logR, 2, sd, na.rm = TRUE) / sqrt(length(param$ct))) ^ 2)
param$lambda <- rep(logR.var, length(param$ct))
param$lambda[param$ct %in% c(2)] <- logR.var
param$lambda[param$ct %in% c(1,3)] <- logR.var
param$lambda[param$ct >= 4] <- logR.var / 5
param$lambda[param$ct == max(param$ct)] <- logR.var / 15
param$lambda[param$ct.sc.status] <- logR.var / 10
}else{
param$lambda[param$ct %in% c(2)] <- lambda[2]
param$lambda[param$ct %in% c(1)] <- lambda[1]
param$lambda[param$ct %in% c(3)] <- lambda[3]
param$lambda[param$ct >= 4] <- lambda[4]
param$lambda[param$ct == max(param$ct)] <- lambda[2] / 15
param$lambda[param$ct.sc.status] <- lambda[2] / 10
}
param$alphaLambda <- rep(lambdaScaleHyperParam, length(param$ct))
# 1x bulk tumors #
#param$lambda[param$ct %in% c(2)] <- 2000
#param$lambda[param$ct %in% c(1)] <- 1750
#param$lambda[param$ct %in% c(3)] <- 1750
#param$lambda[param$ct >= 4] <- 1500
#param$lambda[param$ct == max(param$ct)] <- 1000 / 25
#param$lambda[param$ct.sc.status] <- 1000 / 75
#param$alphaLambda[param$ct.sc.status] <- 4
#param$alphaLambda[param$ct %in% c(1,3)] <- 5
#param$alphaLambda[param$ct %in% c(2)] <- 5
#param$alphaLambda[param$ct == max(param$ct)] <- 4
#############################################
################ RUN HMM ####################
#############################################
hmmResults.cor <- runHMMsegment(x=tumour_copy, validInd=valid, dataType = "log2",
param = param, chrTrain = chrTrain, maxiter = 50,
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimateSubclone = estimateScPrevalence, verbose = TRUE)
for (s in 1:numSamples){
iter <- hmmResults.cor$results$iter
id <- names(hmmResults.cor$cna)[s]
## convert full diploid solution (of chrs to train) to have 1.0 normal or 0.0 purity
## check if there is an altered segment that has at least a minimum # of bins
segsS <- hmmResults.cor$results$segs[[s]]
segsS <- segsS[segsS$chr %in% chrTrain, ]
segAltInd <- which(segsS$event != "NEUT")
maxBinLength = -Inf
if (sum(segAltInd) > 0){
maxInd <- which.max(segsS$end[segAltInd] - segsS$start[segAltInd] + 1)
maxSegRD <- GenomicRanges::GRanges(seqnames=segsS$chr[segAltInd[maxInd]],
ranges=IRanges::IRanges(start=segsS$start[segAltInd[maxInd]], end=segsS$end[segAltInd[maxInd]]))
hits <- IRanges::findOverlaps(query=maxSegRD, subject=tumour_copy[[s]][valid, ])
maxBinLength <- length(S4Vectors::subjectHits(hits))
}
## check if there are proportion of total bins altered
# if segment size smaller than minSegmentBins, but altFrac > altFracThreshold, then still estimate TF
cnaS <- hmmResults.cor$cna[[s]]
altInd <- cnaS[cnaS$chr %in% chrTrain, "event"] == "NEUT"
altFrac <- sum(!altInd, na.rm=TRUE) / length(altInd)
if ((maxBinLength <= minSegmentBins) & (altFrac <= altFracThreshold)){
hmmResults.cor$results$n[s, iter] <- 1.0
}
# correct integer copy number based on estimated purity and ploidy
correctedResults <- correctIntegerCN(cn = hmmResults.cor$cna[[s]],
segs = hmmResults.cor$results$segs[[s]],
purity = 1 - hmmResults.cor$results$n[s, iter], ploidy = hmmResults.cor$results$phi[s, iter],
cellPrev = 1 - hmmResults.cor$results$sp[s, iter],
maxCNtoCorrect.autosomes = maxCN, maxCNtoCorrect.X = maxCN, minPurityToCorrect = minTumFracToCorrect,
gender = gender[[s]], chrs = chrs, correctHOMD = includeHOMD)
hmmResults.cor$results$segs[[s]] <- correctedResults$segs
hmmResults.cor$cna[[s]] <- correctedResults$cn
## plot solution ##
outPlotFile <- paste0(out_file_dir, "/", id, "_genomeWide_", "n", n, "-p", p)
mainName[counter] <- paste0(id, ", n: ", n, ", p: ", p, ", log likelihood: ", signif(hmmResults.cor$results$loglik[hmmResults.cor$results$iter], digits = 4))
plotGWSolution(hmmResults.cor=hmmResults.cor, s=s, outPlotFile=outPlotFile, plotFileType=plotFileType,
logR.column = "logR", call.column = "Corrected_Call",
plotYLim=plotYLim, estimateScPrevalence=estimateScPrevalence,
seqinfo=NULL, main=mainName[counter])
}
iter <- hmmResults.cor$results$iter
results[[counter]] <- hmmResults.cor
loglik[counter, "loglik"] <- signif(hmmResults.cor$results$loglik[iter], digits = 4)
subClonalBinCount <- unlist(lapply(hmmResults.cor$cna, function(x){ sum(x$subclone.status) }))
fracGenomeSub <- subClonalBinCount / unlist(lapply(hmmResults.cor$cna, function(x){ nrow(x) }))
fracAltSub <- subClonalBinCount / unlist(lapply(hmmResults.cor$cna, function(x){ sum(x$copy.number != 2) }))
fracAltSub <- lapply(fracAltSub, function(x){if (is.na(x)){0}else{x}})
loglik[counter, "Frac_genome_subclonal"] <- paste0(signif(fracGenomeSub, digits=2), collapse=",")
loglik[counter, "Frac_CNA_subclonal"] <- paste0(signif(as.numeric(fracAltSub), digits=2), collapse=",")
loglik[counter, "init"] <- paste0("n", n, "-p", p)
loglik[counter, "n_est"] <- paste(signif(hmmResults.cor$results$n[, iter], digits = 2), collapse = ",")
loglik[counter, "phi_est"] <- paste(signif(hmmResults.cor$results$phi[, iter], digits = 4), collapse = ",")
counter <- counter + 1
}
}
## get total time for all solutions ##
elapsedTimeSolutions <- proc.time() - ptmTotalSolutions
message("Total ULP-WGS HMM Runtime: ", format(elapsedTimeSolutions[3] / 60, digits = 2), " min.")
### SAVE R IMAGE ###
save.image(outImage)
#save(tumour_copy, results, loglik, file=paste0(outDir,"/",id,".RData"))
### SELECT SOLUTION WITH LARGEST LIKELIHOOD ###
loglik <- loglik[!is.na(loglik$init), ]
if (estimateScPrevalence){ ## sort but excluding solutions with too large % subclonal
fracInd <- which(loglik[, "Frac_CNA_subclonal"] <= maxFracCNASubclone &
loglik[, "Frac_genome_subclonal"] <= maxFracGenomeSubclone)
if (length(fracInd) > 0){ ## if there is a solution satisfying % subclonal
ind <- fracInd[order(loglik[fracInd, "loglik"], decreasing=TRUE)]
}else{ # otherwise just take largest likelihood
ind <- order(as.numeric(loglik[, "loglik"]), decreasing=TRUE)
}
}else{#sort by likelihood only
ind <- order(as.numeric(loglik[, "loglik"]), decreasing=TRUE)
}
#new loop by order of solutions (ind)
outPlotFile <- paste0(out_file_dir, "/", id, "_genomeWide_all_sols")
for(i in 1:length(ind)) {
hmmResults.cor <- results[[ind[i]]]
turnDevOff <- FALSE
turnDevOn <- FALSE
if (i == 1){
turnDevOn <- TRUE
}
if (i == length(ind)){
turnDevOff <- TRUE
}
plotGWSolution(hmmResults.cor, s=s, outPlotFile=outPlotFile, plotFileType="pdf",
logR.column = "logR", call.column = "Corrected_Call",
plotYLim=plotYLim, estimateScPrevalence=estimateScPrevalence,
seqinfo = NULL,
turnDevOn = turnDevOn, turnDevOff = turnDevOff, main=mainName[ind[i]])
}
hmmResults.cor <- results[[ind[1]]]
hmmResults.cor$results$loglik <- as.data.frame(loglik)
hmmResults.cor$results$gender <- gender
hmmResults.cor$results$coverage <- coverage
outputHMM(cna = hmmResults.cor$cna, segs = hmmResults.cor$results$segs,
results = hmmResults.cor$results, patientID = id, outDir=out_file_dir)
outFile <- paste0(out_file_dir, "/",id, ".params.txt")
outputParametersToFile(hmmResults=hmmResults.cor, file = outFile)
plotSolutions(hmmResults.cor, tumour_copy, chrs, outDir=out_file_dir, numSamples=numSamples,
logR.column = "logR", call.column = "Corrected_Call",
plotFileType=plotFileType, plotYLim=plotYLim, seqinfo = NULL,
estimateScPrevalence=estimateScPrevalence, maxCN=maxCN)
job_report=build_job_report(
job_id=job,
executor_id=executor_id,
exec_code=list(),
task_id=task_id,
input_args = argg,
out_file_dir=out_file_dir,
out_files=list(
param=outFile,
plot_solutions=outPlotFile
)
)
return(job_report)
}
#' IchorCNA implementation for Capture Panel Data
#'
#'
#' @param cnrs [REQUIRED] Path to tumour CNR file.
#' @param normal [OPTIONAL] Path to normal samples. Default c(0.5,0.6,0.7,0.8,0.9)
#' @param ploidy [OPTIONAL] Initial tumour ploidy; can be more than one value if additional ploidy initializations are desired. Default: 2
#' @param lambda [OPTIONAL] Initial Student's t precision; must contain 4 values (e.g. c(1500,1500,1500,1500)); if not provided then will automatically use based on variance of data
#' @param scStates [OPTIONAL] Subclonal states to consider. Default NULL
#' @param output_name [OPTIONAL] Name for the output. If not given the name of the first tumour sample of the samples will be used.
#' @param lambdaScaleHyperParam [OPTIONAL] Hyperparameter (scale) for Gamma prior on Student's-t precision. Default 3
#' @param maxCN [OPTIONAL] Total clonal states. Default 7.
#' @param estimateNormal [OPTIONAL] Estimate normal. Default TRUE.
#' @param estimateScPrevalence [OPTIONAL] Estimate subclonal prevalence. Default TRUE.
#' @param estimatePloidy [OPTIONAL] Estimate tumour ploidy. Default TRUE.
#' @param maxFracGenomeSubclone [OPTIONAL] Exclude solutions with subclonal genome fraction greater than this value. Default 0.5
#' @param maxFracCNASubclone [OPTIONAL] Exclude solutions with fraction of subclonal events greater than this value. Default 0.7
#' @param minSegmentBins [OPTIONAL] Minimum number of bins for largest segment threshold required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction
#' @param altFracThreshold [OPTIONAL] Minimum proportion of bins altered required to estimate tumor fraction; if below this threshold, then will be assigned zero tumor fraction. Default: [0.05]
#' @param includeHOMD [OPTIONAL] If FALSE, then exclude HOMD state. Useful when using large bins (e.g. 1Mb). Default FALSE.
#' @param txnE [OPTIONAL] Self-transition probability. Increase to decrease number of segments. Default: [0.9999999]
#' @param txnStrength [OPTIONAL] Transition pseudo-counts. Exponent should be the same as the number of decimal places of --txnE. Default: [1e+07]
#' @param plotFileType [OPTIONAL] File format for output plots. Default pdf
#' @param plotYLim [OPTIONAL] ylim to use for chromosome plots. Default: [c(-2,2)]
#' @param output_dir [OPTIONAL] Path to the output directory.
#' @param threads [OPTIONAL] Number of threads to split the work. Default 4
#' @param ram [OPTIONAL] RAM memory to asing to each thread. Default 4
#' @param verbose [OPTIONAL] Enables progress messages. Default False.
#' @param mode [REQUIRED] Where to parallelize. Default local. Options ["local","batch"]
#' @param executor_id Task EXECUTOR ID. Default "recalCovariates"
#' @param task_name Task name. Default "recalCovariates"
#' @param time [OPTIONAL] If batch mode. Max run time per job. Default "48:0:0"
#' @param update_time [OPTIONAL] If batch mode. Job update time in seconds. Default 60.
#' @param wait [OPTIONAL] If batch mode wait for batch to finish. Default FALSE
#' @param hold [OPTIONAL] HOld job until job is finished. Job ID.
#' @export
parallel_sample_ichor_capture=function(
cnrs="",
patient_id="",
normal=0.5,
ploidy=2,
maxCN=7,
includeHOMD=FALSE,
scStates=c(1,3),
txnE=0.9999999,
txnStrength=1e7,
output_dir=".",
min_cov=-15,
lambda=NULL,
coverage=NULL,
minSegmentBins=50,
minTumFracToCorrect=0.01,
chrs=c(1:22,"X"),
chrTrain=seq(1:22),
gender="male",
maxFracCNASubclone=0.7,
maxFracGenomeSubclone=0.5,
lambdaScaleHyperParam=3,
altFracThreshold=0.05,
estimateNormal=TRUE,
estimatePloidy=TRUE,
estimateScPrevalence=TRUE,
plotYLim=c(-2,2),
plotFileType="pdf",
verbose=FALSE,
batch_config=build_default_preprocess_config(),
threads=4,ram=4,mode="local",
executor_id=make_unique_id("parSamplehybridIchorCNA"),
task_name="parSamplehybridIchorCNA",time="48:0:0",
update_time=60,
wait=FALSE,hold=NULL
){
argg <- as.list(environment())
task_id=make_unique_id(task_name)
out_file_dir=set_dir(dir=output_dir,name=patient_id)
job=build_job(executor_id=executor_id,task_id=task_id)
jobs_report=build_job_report(
job_id=job,
executor_id=executor_id,
exec_code=list(),
task_id=task_id,
input_args=argg,
out_file_dir=out_file_dir,
out_files=list(
)
)
cnr_list=cnrs
names(cnr_list)=Vectorize(get_file_name)(cnrs)
if(mode=="local"){
jobs_report[["steps"]][["par_sample_ichor_capture"]]<-
parallel::mclapply(cnr_list,FUN=function(cnr){
job_report <-ichor_capture(
cnr=cnr,
normal=normal,
ploidy=ploidy,
maxCN=maxCN,
includeHOMD=includeHOMD,
scStates=scStates,
txnE=txnE,
txnStrength=txnStrength,
output_name=get_file_name(cnr),
output_dir=out_file_dir,
min_cov=min_cov,
lambda=lambda,
coverage=coverage,
minSegmentBins=minSegmentBins,
minTumFracToCorrect=minTumFracToCorrect,
chrs=chrs,
chrTrain=chrTrain,
gender=gender,
maxFracCNASubclone=maxFracCNASubclone,
maxFracGenomeSubclone= maxFracGenomeSubclone,
lambdaScaleHyperParam=lambdaScaleHyperParam,
altFracThreshold=altFracThreshold,
estimateNormal=estimateNormal,
estimatePloidy=estimatePloidy,
estimateScPrevalence=estimateScPrevalence,
plotYLim=plotYLim,
plotFileType=plotFileType,
verbose=verbose,
batch_config=batch_config,
threads=threads,
ram=ram,mode=mode,
executor_id=task_id,
time=time,
hold=hold
)
},mc.cores=threads)
}else if(mode=="batch"){
rdata_file=paste0(out_file_dir,"/",job,".samples.RData")
output_dir=out_file_dir
save(cnr_list,normal,
ploidy,
maxCN,
includeHOMD,
scStates,
txnE,
txnStrength,
min_cov,
lambda,
output_dir,
coverage,
minSegmentBins,
minTumFracToCorrect,
chrs,
chrTrain,
gender,
maxFracCNASubclone,
maxFracGenomeSubclone,
lambdaScaleHyperParam,
altFracThreshold,
estimateNormal,
estimatePloidy,
estimateScPrevalence,
plotYLim,
plotFileType,verbose,file = rdata_file)
exec_code=paste0("Rscript -e \"ULPwgs::ichor_capture(rdata=\\\"",
rdata_file,"\\\",selected=$SGE_TASK_ID)\"")
out_file_dir2=set_dir(dir=out_file_dir,name="batch")
batch_code=build_job_exec(job=job,time=time,ram=ram,
threads=1,output_dir=out_file_dir2,
hold=hold,array=length(cnr_list))
exec_code=paste0("echo '. $HOME/.bashrc;",batch_config,";",exec_code,"'|",batch_code)
if(verbose){
print_verbose(job=job,arg=argg,exec_code=exec_code)
}
error=execute_job(exec_code=exec_code)
if(error!=0){
stop("cnvkit failed to run due to unknown error.
Check std error for more information.")
}
jobs_report[["steps"]][["par_sample_ichor_capture"]]<- build_job_report(
job_id=job,
executor_id=executor_id,
exec_code=exec_code,
task_id=task_id,
input_args=argg,
out_file_dir=out_file_dir,
out_files=list(
param=paste0(out_file_dir,"/ichor_capture/",names(cnr_list),"/",names(cnr_list),".params.txt"),
plot=paste0(out_file_dir,"/ichor_capture/",names(cnr_list),"/",names(cnr_list),"_genomeWide_all_sols.",plotFileType)
)
)
}
return(jobs_report)
}
#' Generate a ichorCNA for WGSd
#'
#' This function generates a WIG file.
#'
#'
#' @param bam Path to the BAM file .
#' @param bin_samtools Path to readCounter executable. Default path tools/samtools/samtools.
#' @param bin_readcount Path to readCounter executable. Default path tools/hmmcopy_utils/bin/readCounter.
#' @param output_dir Path to the output directory.
#' @param chrs String of chromosomes to include. c()
#' @param win Size of non overlaping windows. Default 500000.
#' @param format Output format [wig/seg] . Default wig
#' @param threads Number of threads to use. Default 3
#' @param verbose Enables progress messages. Default False.
#' @export
ichor_wgs=function(
bin_ichorcna=build_default_tool_binary_list()$bin_ichor,
wig=NULL,
patient_id=NULL,
normal="\"c(0.5,0.6,0.7,0.8,0.9)\"",
ploidy="\"c(2,3)\"",
maxCN=5,
gcWig=build_default_reference_list()$HG19$wgs$ichorcna$gc_500k,
mapWig=build_default_reference_list()$HG19$wgs$ichorcna$map_500k,
centromere=build_default_reference_list()$HG19$wgs$ichorcna$centromere,
normalPanel=build_default_reference_list()$HG19$wgs$ichorcna$pon_500k,
chrs="\"c(1:22, 'X')\"",
chrTrain="\"c(1:22)\"",
estimateNormal=TRUE,
estimatePloidy=TRUE,
estimateScPrevalence=TRUE,
includeHOMD=FALSE,
scStates="\"c(1,3)\"",
txnE=0.9999,
txnStrength=10000,
...
){
options(scipen=999)
run_main=function(
.env
){
.this.env=environment()
append_env(to=.this.env,from=.env)
out_file_dir=set_dir(
out_file_dir,
name=paste0(patient_id,"/ichor_cna/",input_id)
)
set_main(.env=.this.env)
.main$exec_code=paste0(
"Rscript ",bin_ichorcna,
" --id ",ifelse(is.null(patient_id),input_id,patient_id),
" --WIG ",input,
" --ploidy ",ploidy,
" --normal ", normal,
" --maxCN ", maxCN,
" --gcWig ", gcWig,
" --mapWig ", mapWig,
" --centromere ", centromere,
" --normalPanel ", normalPanel,
" --includeHOMD ",ifelse(includeHOMD,"True","False"),
" --chrs ",chrs,
" --chrTrain ",chrTrain,
" --estimateNormal ", ifelse(estimateNormal,"True","False"),
" --estimatePloidy ", ifelse(estimatePloidy,"True","False"),
" --estimateScPrevalence ", ifelse(estimateScPrevalence,"True","False"),
" --scStates ",scStates,
" --txnE ",txnE,
" --txnStrength ", txnStrength,
" --outDir ",.main$out_file_dir
)
run_job(.env=.this.env)
.env$.main <- .main
}
.base.env=environment()
list2env(list(...),envir=.base.env)
set_env_vars(
.env= .base.env,
vars="wig"
)
launch(.env=.base.env)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.