R/assign_lead_SNP.R

#' Assign lead SNP
#' 
#' Assign lead SNP by creating a new boolean column "leadSNP" 
#' indicating whether a SNP has the smallest p-value ("P).
#' If multiple rows have the same p-value, 
#'  the one with the largest effective size ("Effect") is used as the lead SNP.
#' @param dat Fine-mapping data.table.
#' @param grouping_vars Column names to group by.
#' @param verbose Print messages.
#' @returns Modified data.table.
#' 
#' @export 
#' @importFrom dplyr arrange desc group_by_at
#' @importFrom utils head
#' @importFrom data.table copy
#' @examples 
#' dat <- echodata::assign_lead_snp(dat = echodata::BST1)
assign_lead_snp <- function (dat, 
                             grouping_vars="Locus",
                             verbose = TRUE) {
    P <- Effect <- NULL; 
    if(!"Locus" %in% colnames(dat)) dat$Locus <- "dummy"
    
    if ((!"leadSNP" %in% colnames(dat)) ||
        sum(dat$leadSNP, na.rm = TRUE) == 0) {
        messager("+ leadSNP missing. Assigning new one by min p-value.", 
                v = verbose)
        top_snps <- (
            data.table::copy(dat) |>
                dplyr::group_by_at(.vars = grouping_vars) |>
                dplyr::arrange(P, dplyr::desc(Effect))
        )$SNP[1]
        dat$leadSNP <- dat$SNP == top_snps
    }
    if(all(dat$Locus=="dummy")) dat$Locus <- NULL
    return(dat)
}
RajLabMSSM/echodata documentation built on Nov. 21, 2023, 8 a.m.