# This example is for users who would like to tune their model
# for a specific dataset where the average number of mutations
# differ significantly from the datasets for which the built in
# models exists. For those cases it is possible to determine a
# threshold for an existing model (follow test_determine_cutoff.R
# but a new tune would perform better.
devtools::load_all()
data_file <- system.file("extdata/examples/tcga_mc3_brca.maf", package="SigMA")
m <- make_matrix(data_file, file_type='maf', ref_genome_name = 'hg19')
df <- conv_snv_matrix_to_df(m)
file_name <- 'matrix_96dim_tcga_brca.csv'
tumor_type <- 'breast'
catalog_name <- 'cosmic_v2_inhouse'
write.table(df, file_name, row.names = F, sep = ',', quote = F)
# First run the built in model
output_file_built_in <- run(file_name,
data = 'tcga_mc3', # if it is a panel try msk
tumor_type = tumor_type,
do_mva = T,
do_assign = T,
check_msi = T,
catalog_name = catalog_name)
# then assuming that we do not have the model let's try to tune
# a new model for our data and we will then compare to the
# built in calculation
# First determine the closest 'data' setting, which
# indicates the sequencing platform, to use
# remove MSI samples if you have other sources of hypermutations e.g.
# temozolomide treated patients etc you want to remove them as well
m <- read.csv(output_file_built_in)
lite <- lite_df(m)
m <- m[!(lite$categ %in% c("Signature_msi", "Signature_pole", 'MMRD', 'POLE')),]
file_name_no_msi <- gsub(file_name, pattern = '.csv', replace = '_nomsi.csv')
write.table(m[,1:97], file_name_no_msi, row.names = F, sep = ',', quote = F)
data <- find_data_setting(input_file = file_name_no_msi,
tumor_type = tumor_type,
remove_msi_pole = F,
catalog_name = catalog_name) # since we removed these samples above otherwise has to be T
# if the best data option for a panel turns out to be exome sequencing
# you may want to check the germline contamination in your mutation calls
message(paste0('Best data option is ', data))
# Once this is identified it allows us to get an
# estimate of the total snv counts in Sig3+ and Sig3-
# samples identified at first order. To tune a new model
# that fits the SNV count in our dataset we first
# simulate a new dataset from WGS data for which
# the sig3 is known from WGS analysis and is more
# reliable
simul_file <- quick_simulation(file_name_no_msi,
tumor_type = tumor_type,
data = data,
run_SigMA = T,
remove_msi_pole = F,
catalog_name = catalog_name) #since we removed these samples above otherwise has to be T
message('tuning')
message(simul_file)
# Using the imulations tune a new Gradient Boosting Machine
tune_new_gbm(simul_file,
tumor_type = tumor_type,
data = data,
run_SigMA = T,
rda_file = 'test.rda',
catalog_name = catalog_name)
message('tuned')
load('test.rda')
simul_file_output <- gsub(simul_file, pattern = '.csv', replace = '_predictions.csv')
df_predict <- read.csv(simul_file_output)
# Get thresholds for given false positive rate
limits_fpr <- c(0.1, 0.05)
cut_var <- 'fpr' # you can alternatively use 'sen' to select on specific sensitivity value
thresh <- get_threshold(df_predict, limits_fpr, var = 'prob', cut_var = cut_var, signal = 'is_sig3') # FPR < 0.1
cutoff <- thresh$cutoff[[1]]
cutoff_strict <- thresh$cutoff[[2]]
# then we add the new gbm model in the system files together with the
# cutoffs we determined so that these can be used in the future for
# new data sequenced with the same sequencing platform
add_gbm_model('example_gbm',
tumor_type = tumor_type,
gbm_model = gbm_model,
cutoff = cutoff,
cutoff_strict = cutoff_strict)
# test the following command and replace the bed file with that of your libraries:
bed_file <- system.file("extdata/examples/example_small_bedfile.bed", package="SigMA")
norm96 <- get_trinuc_norm(bed_file)
# then imagine we received a new dataset which is in our example
# the same dataset we analyzed earlier with the built in model
output_new_tune <- run(file_name,
data = 'example_gbm',
tumor_type = tumor_type,
catalog_name = catalog_name,
do_mva = T,
do_assign = T,
check_msi = T,
custom = T,
norm96 = norm96) # replace_with norm96 using the bed file that defines the library coverage
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.