#' doMutationAnalysis
#'
#' This function calculates the proportion of samples with either a loss of function mutation (truncation etc)
#' or just a general protein coding change mutation in the cohort
#'
#' @param con A \code{SQLiteConnection} object
#' @param genes A vector of human ENSEMBL gene ids
#' @return a data frame with the results of the analysis
#' @export
doMutationAnalysis <- function(con, genes) {
#get the data
mutdata <- dplyr::src_sqlite(con@dbname) %>%
dplyr::tbl('tcga_mutation_data') %>%
dplyr::filter(gene_id %in% genes) %>%
dplyr::collect()
#process the data
mutdata_processed <- mutdata %>%
dplyr::mutate(is_lof = grepl('Nonsense|Frame_Shift|Splice', Variant_Classification), #only LOF mutations
is_protein_coding = grepl('Nonsense|Frame_Shift|Splice|Missense', Variant_Classification)) %>% #any protein coding mutation
as.data.frame()
#how many patients in total
Npatients <- dplyr::src_sqlite(con@dbname) %>%
dplyr::tbl('tcga_mutation_data') %>%
dplyr::select(patient_id) %>%
dplyr::distinct() %>%
dplyr::collect() %>%
nrow()
#count up number of mutated samples per gene
output_df <- mutdata_processed %>%
dplyr::group_by(gene_id) %>%
dplyr::summarise(N_lof = sum(is_lof),
pct_lof = round(N_lof*100/Npatients, 2),
N_protein_coding = sum(is_protein_coding),
pct_protein_coding = round(N_protein_coding*100/Npatients, 2),
patient_ids = paste(patient_id, collapse=';'),
mutations = paste(Protein_Change, collapse=';')) %>%
dplyr::ungroup()
#add in genes with no mutations
no_mut_genes <- data.frame(gene_id = setdiff(genes, output_df$gene_id),
N_lof=0,
pct_lof=0,
N_protein_coding=0,
pct_protein_coding=0,
patient_ids='',
mutations='',
stringsAsFactors = FALSE)
#combine output
output_df <- dplyr::bind_rows(output_df, no_mut_genes) %>% as.data.frame()
#put data into sqlite database
DBI::dbWriteTable(con, "mutation_analysis_results", output_df, overwrite=TRUE)
DBI::dbWriteTable(con, 'mutation_analysis_data', mutdata_processed, overwrite=TRUE)
message('Finished')
return(output_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.