R/mutation_extraction_mapping_wrap.R

Defines functions extraction_annotation_pos

Documented in extraction_annotation_pos

# 
# library(data.table)
# library(dplyr)
# library(magrittr)

#' 
#' This function generates two files as the final products. First, mutations which result in protein-consequence with
#' effect on proteins annotated; Second, mutations without protein-consequence.
#' 
#' @param mutation_df Data frame of mc3 somatic mutations. 
#' @param cancer_type An identifier of cancer type, e.g. ACC,STAD,BRCA. 
#' @param cancer_barcod A list of barcodes user wants to process, matching the last column of mutation file. 
#' @param output_dir The directory you would like to have your output files in.
#' @import dplyr magrittr data.table
#' @export
#' @details 
#' @examples 
#' extraction_annotation_pos(mutation_df = sel_example_mutation,
#'                                  cancer_type = "example",
#'                                  cancer_barcode = sel_example_barcode,
#'                                  output_dir = "/Users/ginny/Google Drive/R_GPD/GPD_package_0401/example/")



extraction_annotation_pos = function(mutation_df,
                                             cancer_type,
                                             cancer_barcode,
                                             output_dir)
{

  output_dir = heal_dir(output_dir)
  
  select_cancer_mc3(mc3_df = mutation_df ,
                    cancer_barcode = cancer_barcode,
                    output_dir = output_dir,
                    output_name = paste0(cancer_type,"_mutation.tsv"))
  cat("1/3...variants for this cancer type selected.","\n")
  
  
  this_mc3_name = paste0(output_dir, cancer_type, "_mutation.tsv")
  
  divide_somatic_to_pc_npc(mc3_data_name = this_mc3_name,
                           output_dir = output_dir,
                           pc_output_name = paste0(cancer_type, "_mutation_pc.tsv"),
                           npc_output_name = paste0(cancer_type, "_mutation_npc.tsv"))
  cat("2/3...variants divided to PC and NPC parts.","\n")
  
  
  this_pc_name = paste0(output_dir, cancer_type, "_mutation_pc.tsv")
  
  annotate_mc3_pc_position_info(pc_data_name = this_pc_name,
                                output_dir = output_dir,
                                output_name = paste0(cancer_type, "_mutation_pc_pos.tsv"))

  cat("3/3...protein positions for PC variants annotated.","\n")
  
  
  mc_file = paste0(output_dir, paste0(cancer_type,"_mutation.tsv"))
  if(file.exists(mc_file))
    file.remove(mc_file)
  
  
  pc_file = paste0(output_dir, paste0(cancer_type,"_mutation_pc.tsv"))
  if(file.exists(pc_file))
    file.remove(pc_file)
  
  
  cat("Variants extraction and annotation finished!","\n")
  

}





#' This function generates three files as the final products. First, each piu mapped, second, lu summarised per gene, third, 
#' npc summarised per gene.
#' 
#' @param piu_filename  The file provided by the package where the piu information is recorded.
#' @param pc_data_name the file listing mutations that result in protein consequence. A file generated by function germline_extraction_annotation_pos
#' @param npc_data_name the file listing mutations that result in non-protein consequence. A file generated by function germline_extraction_annotation_pos
#' @param cancer_barcode Patient barcodes for this cancer type cohort.
#' @param output_dir The directory you would like to have your output files in.
#' @import dplyr magrittr data.table
#' @export
#' @details 
#' @examples 
#' piu_mapping (piu_df = ptm_pfam_combine,
#'                      pc_data_name  =  "/Users/ginny/Google Drive/R_GPD/GPD_package_0401/example/example_mutation_pc_pos.tsv",
#'                      npc_data_name = "/Users/ginny/Google Drive/R_GPD/GPD_package_0401/example/example_mutation_npc.tsv",
#'                      cancer_barcode = sel_example_barcode,
#"                      output_dir = "/Users/ginny/Google Drive/R_GPD/GPD_package_0401/example/")

piu_mapping = function (piu_df,
                                 pc_data_name,
                                 npc_data_name,
                                 cancer_barcode,
                                 output_dir)

{
   output_dir = heal_dir(output_dir)
  
  
  mc3_map_uni_piu (ptm_pfam_df = piu_df,
                             pc_data_name = pc_data_name,
                             cancer_barcode = cancer_barcode,
                             output_dir = output_dir,
                             piu_output_filename = "piu_mapping_count.tsv",
                             lu_output_filename = "lu_summarising_count.tsv")
  
  if(file.exists(paste0(output_dir,"piu_mapping_count.tsv")))
  {
    cat("1/2...PIU and LU count matrices generated.","\n")
    
  }else{
    cat("1/2...proceed to NPC mapping.","\n")
  }
  

  
  mc3_map_npc (npc_data_name = npc_data_name,
                          cancer_barcode = cancer_barcode,
                          output_dir = output_dir,
                          output_filename = "ncu_summarising_count.tsv")
  
  
  if(file.exists(paste0(output_dir,"ncu_summarising_count.tsv")))
  {
    cat("2/2...NCU count matrix generated.","\n")
    
    
  }else{
    cat("2/2.. no NCU mapped.","\n")
  }
  
  
  
  
  cat("PIU mapping finished!", "\n")
  
  
}
  
  
  
  
  
ginnyintifa/GPD documentation built on Oct. 23, 2019, 1:52 a.m.