#' Generate segment & log2r intensity files calculated by CONUMEE.
#' @param ref_genes Granges object with subset of genes to use for cnv analysis.
#' @param targets A sample sheet with the required fields for minfi to load files.
#' @param Kc_method Choose which Kc you want to use. Either "curated" or "balanced.
#' The first comes from a list of 94 genes manually selected by experts and an
#' uneven proportion of cancers. The second one comes from all genes and balanced
#' proportion of cancer types. default= balanced
#' @param purity Whether or not you want purity imputed by Rfpurify.
#' Default=TRUE.
#' @param out Parent working directory where you want to have your results.
#' @param folder this directory will be created as the combination of out
#' and subf. if you save the csv file inside this folder it will be used by
#' minfi::read.450k.sheet.
#' @param RGset Whether you want normalised "RGChannelSet" or
#' "RGChannelSetExtended" to be saved or not. path=out
#' @param arraytype Methylation array type
#' @param Sample_Name Samples to be analysed. If is NULL all columns in input
#' file will be used. Accepts numbered index and names. Default=NULL
#' @param intensities dataset with intensities. either dataframe or path to file.
#' For big datasets '.fst' file is recomended.
#' @param anno_file anno file CNV.anno object saved as .rds. default
#' CNV_Germline_GSITIC2_BROAD_SNP6.merged.151117.hg19.CNV.txt
#' @param ctrl_file CNV data object with intensities from the controls group.
#' must be compressed as '.rds' file format, default uses 96 WB samples.
#' Default='WB'
#' @param conumee.folder Parent working directory where you want to have your results.
#' @param seg.folder Subfolder inside results where segment files are saved.
#' The segments are generated with CONUMEE CNV.segment(CNV.detail(CNV.bin(fit)))
#' default = "Segments"
#' @param log2r.folder Subfolder inside results where log2r values are saved.
#' log2r values are the log2 ratio of the intensities(_GRN + _RED channel) between the
#' query and the reference set (control) as returned by CNV.fit function from CONUMEE.
#' default = "log2r"
#' @param probeid name of column with probe ids.
#' @param ncores number of cores to use.
#' @param cn_genes Used to subset genes from the reference set. Needed in order
#' to generate new Kc from a reference set, where cn_genes are previously known
#' genes to be altered in a given cn_state. Otherwise use ref_genes.
#' @return data.frame with metadata from segmentation, genomic ranges, genes
#' and scna for each of the genes
#' @export
#'
#' @examples
#' #data("TrainingSet_Sample_sheet")
#' #ss<-TrainingSet_Sample_sheet[1:56,]
#' t0<-Sys.time()
#' cnv<-cnv.methyl(targets=ss)
#' t1<-Sys.time()
#' cnv
#' print(paste0("elapsed time: ",t1-t0))
run_cnv.methyl<-function(
targets,ref_genes="all",cn_genes=NULL, Kc_method, out="analysis/intermediate/",
RGset=T, purity= NULL, arraytype="450K",folder=NULL, anno_file=NULL, ctrl_file='WB',
Sample_Name=NULL,ncores=NULL, seg.folder = "Segments", log2r.folder = "log2r",
conumee.folder="analysis/CONUMEE/", probeid="probeid",intensities=NULL){
if(!is.null(intensities)){intensity<-intensities}else{
intensity<-pre_process(targets = targets, purity=purity, RGset = RGset,
out=out,folder=folder,arraytype=arraytype)
}
anno<-get_anno(anno=anno_file,arraytype=arraytype)
ss<-data.table::fread(paste0(out,"Sample_Sheet.txt"))
run_conumee(intensities = intensity,anno_file=anno, ctrl_file=ctrl_file,
Sample_Name=Sample_Name,seg.folder = seg.folder,
log2r.folder = log2r.folder,arraytype=arraytype,
conumee.folder=conumee.folder, probeid=probeid)
#test
#devtools::load_all()
#rgSet<-readRDS("/data/CN_SCLC/rgSet.rds")
#ss_all<-ss<-data.table::data.table(as.data.frame(rgSet@colData))
#ss$impute_purity<-purify(rgSet)
# Cluster:
cl<- parallel::makePSOCKcluster(get_ncores(ncores), outfile="")
parallel::clusterEvalQ(cl,{
library("data.table")
#devtools::load_all()
})
doParallel::registerDoParallel(cl)
res<-foreach::foreach(i=1:length(ss$Sample_Name),#isplitIndices(1400,chunks=ncores),
.combine='c',
.multicombine = F,
.inorder=F,
.packages = "data.table",
.export = c("AllGenes","CancerGenes"),
.errorhandling = "pass"
)%dopar%{
#load("R/sysdata.rda")
#source("R/utils.R")
#source("R/Kc_get.R")
#source()
# purify(rgSet)->ss$impute_purity
# ss<-ss[i,]
# ID<-ss$Sample_Name
# arraytype="450K"
# ref_genes="all"
# anno=anno_epic
cna <- Kc_get(ss=ss,ID=ID,ref_genes = ref_genes,arraytype=arraytype,anno_file = anno)
# cna
return(cna)
}
parallel::stopCluster(cl)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.