R/get_plink_rsid_ranges.R

Defines functions get_plink_rsid_ranges

Documented in get_plink_rsid_ranges

#' @title get_plink_rs_ranges
#' @description Returns rsid ranges for a file generated by 'get_variants_in_genes'
#' @param df data.frame with three columns: 'variant_id','variant_position','ensembl_gene_id'.
#' @return ordered rsid ranges that can be directly inputted to plink2
#' @author flassen
#' @export

get_plink_rsid_ranges = function(df){

  # check input
  assume = c('variant_id','variant_position','ensembl_gene_id')
  stopifnot(assume %in% colnames(df))
  stopifnot(nrow(df) > 0)

  genes = unique(df$ensembl_gene_id)
  result = lapply(genes, function(g){
    r = df[df$ensembl_gene_id == g, ]
    r = r[order(r$variant_position),]
    top = head(r, 1)
    bottom = tail(r, 1)
    consensus = data.frame(
      ensembl_gene_id = top$ensembl_gene_id,
      start_position = top$start_position,
      end_position = bottom$end_position,
      start_variant = top$variant_id,
      end_variant = bottom$variant_id,
      variant_range = paste0(top$variant_id,"-",bottom$variant_id)
    )

    return(consensus)
  })

  combined = as.data.frame(do.call(rbind, result))
  return(combined)
}
frhl/our documentation built on Feb. 5, 2021, 7:30 p.m.