R/rhl30_model.R

Defines functions get_rhl30_model_coef_df get_rhl30_scores_df

Documented in get_rhl30_model_coef_df get_rhl30_scores_df

#' Get the RHL30 Model Coefficients
#'
#' Return the RHL30 model coefficients as a data frame
#'
#' @export
#' @return Data frame of the RHL30 coefficients
get_rhl30_model_coef_df <- function() {
  structure(
    list(
      gene_name = 
        c(
          "ACTB", "ALAS1", "CLTC", "GAPDH", "GUSB", "PGK1", "POLR2A", "RPL19", 
          "RPLP0", "SDHA", "TBP", "TUBB", "ABCA3", "ABCG1", "APOE", "BACH2", 
          "CCL20", "CR2", "CSF1", "CX3CL1", "GCS", "IGSF3", "IL13RA1", "IL3RA", 
          "LGMN", "NGK2D", "RNF144B", "SDC4", "SOD2", "TNFSF9"
        ), 
      refseq_mrna_id = 
        c(
          "NM_001101.2", "NM_000688.4", "NM_004859.2", "NM_002046.3", 
          "NM_000181.1", "NM_000291.2", "NM_000937.2", "NM_000981.3", 
          "NM_001002.3", "NM_004168.1", "NM_001172085.1", "NM_178014.2", 
          "NM_001089.2", "NM_207174.1", "NM_000041.2", "NM_021813.2", 
          "NM_004591.1", "NM_001006658.1", "NM_000757.4", "NM_002996.3", 
          "NM_001498.2", "NM_001542.2", "NM_001560.2", "NM_002183.2", 
          "NM_001008530.2", "NM_024518.1", "NM_182757.2", "NM_002999.2", 
          "NM_000636.2", "NM_003811.3"
        ), 
      gene_type = 
        c(
          "housekeeper", "housekeeper", "housekeeper", "housekeeper", 
          "housekeeper", "housekeeper", "housekeeper", "housekeeper", 
          "housekeeper", "housekeeper", "housekeeper", "housekeeper", "model", 
          "model", "model", "model", "model", "model", "model", "model", 
          "model", "model", "model", "model", "model", "model", "model",
          "model", "model", "model"
        ), 
      coefficient = 
        c(
          NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.118514252096935, 
          0.00678524312415628, 0.0977608756988918, -0.0970736660718595, 
          0.0601204445696382, -0.0301980923909464, 0.282480739676071, 
          0.0651576842789112, 0.152117325885877, 0.0543152852731469, 
          0.0644693169147704, 0.0458709339799457, 0.0748446274704466,
          -0.000636062087683578, 0.104732223627018, 0.12175967271576, 
          0.0484320148407741, 0.213505755430869
        )
      ), 
    row.names = c(NA, -30L), 
    class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame")
  )
}

#' Generate the RHL30 Predictor Scores
#'
#' `get_rhl30_scores_df` returns RHL30 scores.
#'
#' @param exprs_mat Normalized expression matrix generated by 
#'   `normalize_exprs_mat`
#' @param rhl30_model_df Data frame containg details of the RHL30 model. This
#'   should be the data frame from the `get_rhl30_model_coef_df` function.
#' @return A two column data.frame with sample_id and RHL30 scores
#' @export
get_rhl30_scores_df <- function(exprs_mat, rhl30_model_df) {

  if (! is.matrix(exprs_mat)) {
    stop(
      paste(
        "[get_rhl30_scores]: exprs_mat is not a matrix. Please make sure",
        "exprs_mat is matrix"
      )
    )
  }

  rhl30_predictor_df <- 
    dplyr::filter(rhl30_model_df, gene_type == "model")

  if (! all(rhl30_predictor_df$gene_name %in% rownames(exprs_mat))) {
    missing.gene <- ! rhl30_predictor_df$gene_name %in% rownames(exprs_mat)
    stop(
      paste(
        "[get_rhl30_scores]: Following genes missing from gene expression 
        matrix:", 
        paste(rhl30_predictor_df[missing.gene, "gene_name"], 
              collapse = ", ")
        )
    )
  }

  model.coef <- 
    matrix(
      rhl30_predictor_df$coefficient, 
      1, nrow(rhl30_predictor_df),
      dimnames = list(NULL, rhl30_predictor_df$gene_name)
  )

  scores <- model.coef %*% exprs_mat[colnames(model.coef), ]
  scores_vector <- scores[1, ]

  scores_df <- 
    tibble::tibble(
      sample_id = names(scores_vector),
      score = unname(scores_vector), 
    )
  return(scores_df)
}
tinyheero/RHL30 documentation built on March 5, 2020, 2:07 a.m.