
#Assumes R3.3.1+

#' Save annotated Gene Matrix
#' Main function to generate a default Gene Matrix and store it in a data.table.
#' The first time this may take a long time (up to an hour). Will be much quicker due to caching later.
#' @param settings a named list with user specified settings. Defaults to global defaults in gm_setting
#'     stored in config.R
#' @return data.table with gene matrix
#' @export
generate_genematrix <- function(settings=gm_settings){

stopifnot(c("gencode_version","cache_dir") %in% names(settings) )

#If no gene_matrix_path is found in settings list a default filename is save in the current working directory.
if(! "gene_matrix_prefix" %in% names(settings) ){
  gene_matrix_prefix <- paste0("genematrix_gencodev", settings$gencode_version, "_v", settings$gmversion)
  gene_matrix_path <- file.path(getwd(), gene_matrix_prefix)


message("Gene matrix file will be saved as: ", gene_matrix_path)

# Create core gene matrix by processing and merging gencode, hgnc, and entrez data
core <- get_core_matrix(settings)

# Create mapping from alias to official gene symbols
gene_translation_table <- get_symbol_table(core,  settings)

# Add custom annotation to create final gene matrix
message("Add annotations...")
gene_matrix <- add_annotations(core, gene_translation_table, settings)

#' Add annotations to core gene matrix
#' Adds several types of gene annotation to the core matrix, such as constraint scores,
#' gene-based pvalues, GWAS catalog associations, and OMIM associations.
#' @param core core gene matrix to add annotation to
#' @param gene_translation_table list mapping aliases to official symbol used in core gene matrix
#' @return returns a data.table with annotated genes
#' @export
add_annotations <- function(core, gene_translation_table,settings) {

  gene_matrix <- core

  #restrict columns in core matrix

  # Add genebased pvalues
  gene_matrix <- merge_genebased_pvalues(gene_matrix,settings)

  # Add pli scores from exac
  gene_matrix <- merge_exacpli("fullexac", gene_matrix, gene_translation_table, settings)
  gene_matrix <- merge_exacpli("nonpsychexac", gene_matrix, gene_translation_table, settings)

  #Add omim
    gene_matrix <- merge_omim(gene_matrix, gene_translation_table, settings)
    message("Omim file ",settings$omim_morbidmap_file," could not be found. This file requires a license and needs to be manually placed in the cachedir to be used for annotation.")
  #Add gwas catalog
  gene_matrix <- merge_gwascatalog(gene_matrix, settings)

  #Add brain expression
    gene_matrix <- merge_commonmindDLPFC(gene_matrix, settings)
    message("Common mind DLPFC diff expression file ",commonmind_DLPFC_diffexpr_file," could not be found. This file requires a license and needs to be manually placed in the cachedir to be used for annotation.")

  #Final renames


#' Save customized gene matrix based on settings
#' Publish gene matrix in .csv and .Rdata format
#' @param gene_matrix a data.table with gene annotation to save
#' @param settings a list including which columns to save and destination path
#' @return None
#' @export
publish_genematrix <- function(gene_matrix, gene_matrix_prefix=NULL,settings=gm_settings,utility_version=T) {


    suffix <- ""
    if(!utility_version) suffix <-  "_extended"
    #If no gene_matrix_path is found in settings list a default filename is save in the current working directory.
    gene_matrix_prefix <- paste0("genematrix_gencodev", settings$gencode_version, "_v", settings$gmversion,suffix)
    gene_matrix_path <- file.path(getwd(), gene_matrix_prefix)

  #Set column order and rename column names
  output_cols <- fread(output_col_file)

  genep_lab_prefix <- "gene_based_P_"
  genep_row_idx <- which(startsWith(output_cols$label,genep_lab_prefix))

  genep_columns <- names(gene_matrix)[startsWith(names(gene_matrix),genep_lab_prefix)]
  out_df <- output_cols[1:(genep_row_idx-1),]
    out_df <- rbind(out_df,data.table(label=genep_columns,utility_version=output_cols[genep_row_idx,utility_version],description="gene-based pvalue",source="magma"))
  out_df <- rbind(out_df,output_cols[(genep_row_idx+1):nrow(output_cols),])

  setcolorder(gene_matrix, out_df$label)
  gm <- gene_matrix
    out_df <- subset(out_df, utility_version=="Y")
    gm <- gene_matrix[,(out_df$label),with=F]


  # Tab separated for scripting
  #SAS conversion
  write.foreign(df=gm, datafile=paste0(gene_matrix_prefix,"_sas.csv"), codefile=paste0(gene_matrix_prefix,".sas"), package="SAS")
  # R binary format

  gene_matrix_excel <- gm

  for(col in names(gene_matrix_excel)){
      excel_link <- paste0('=HYPERLINK("',gm[[col]],'","',col,'")')

  #.csv for easy import in Excel (add hyperlink)

  labels <- c("publication_date", names(settings))
  values <- c(date(),unlist(settings))
  log_df <- data.table(label=labels,value=values)
  write.table(log_df,file=paste0(gene_matrix_prefix,".settings"),sep="\t",row.names = F,col.names=T)

  message("Gene matrix saved")
svenstringer/genematrix documentation built on May 30, 2019, 8:48 p.m.