R/setters_and_getters.R
In topr: Create Custom Plots for Viewing Genetic Association Results

Documented in get_gene_coords get_genes_by_Gene_Symbol get_lead_snps get_sign_and_sugg_loci get_snps_within_region get_topr_colors get_top_snp

set_size_shape_alpha <- function(df,size,shape,alpha,locuszoomplot=F){
  df <- set_size(df,size, locuszoomplot = locuszoomplot)
  df <- set_shape(df,shape,locuszoomplot = locuszoomplot)
  df <- set_alpha(df,alpha)
  if(!locuszoomplot){
    df <- set_shape_by_impact(df)
  }
  return(df)
}

set_shape_by_impact <- function(dat){
  for(i in seq_along(dat)){
    if("Max_Impact" %in% colnames(dat[[i]])){
      dat[[i]]$shape0 <- ifelse(dat[[i]]$Max_Impact == "MODERATE", 17, dat[[i]]$shape)
      dat[[i]]$shape <- ifelse(dat[[i]]$Max_Impact =="HIGH", 8, dat[[i]]$shape0)
    }
  }
  return(dat)
}

set_annotate <- function(dat,annotate){
  if(! is.null(annotate)){
    for(i in seq_along(dat)){
      if(!is.null(annotate) & nrow(dat[[i]]>0)){
        if(is.vector(annotate) & (length(annotate) >= i))
          dat[[i]]$annotate <- annotate[i]
        else
          dat[[i]]$annotate <- annotate
      }
    }
  }
  return(dat)
}

set_color <- function(dat,color,shades_alpha=0.5, use_shades=T, chr_lightness=0.80, chr=NULL){
  if(length(color) < length(dat))
    stop(paste("There are ",length(dat), " datasets, but only ",length(color), " color. Add more colors with the color argument, eg. color=c(\"blue\",\"yellow\",\"green\", etc)"))
  for(i in seq_along(dat)){
    if(!is.null(color) & nrow(dat[[i]]>0)){
      if(is.vector(color) & (length(color) >= i)){
        if(use_shades || !(is.null(chr))){
          dat[[i]]$color <- color[i]
        }
        else{                  
          even_chr_color <- lightness(color[i], get_chr_lightness(chr_lightness,i) )
          dat[[i]]$color <- ifelse((as.numeric(dat[[i]]$CHROM) %% 2) == 0, even_chr_color , color[i]) 
        }
      }
      else
        dat[[i]]$color <- color
    }
  }
  return(dat)
}

get_chr_lightness <- function(chr_lightness, i){
   if(is.null(chr_lightness) || !is.numeric(chr_lightness))
     chr_lightness <- 0.8
   if(is.vector(chr_lightness)){
     if(length(chr_lightness) >= i)
        chr_lightness <- chr_lightness[i]
    else
        chr_lightness <- chr_lightness[length(chr_lightness)]
   }
  return(chr_lightness)
}




set_size <- function(dat,size, locuszoomplot = locuszoomplot){
  if(is.null(size))
    size <- 1
  for(i in seq_along(dat)){
    if(!is.null(size) & nrow(dat[[i]]>0)){
      if(is.vector(size) &  length(size) >= i )
        dat[[i]]$size <- size[i]
      else
        dat[[i]]$size <- size
    }
    if(locuszoomplot & ("R2" %in% colnames(dat[[i]]))){
      dat[[i]]$size <- ifelse(dat[[i]]$R2 == 1, dat[[i]]$size*1.5, dat[[i]]$size)
      #handle NA values for R2
      dat[[i]]$size[is.na(dat[[i]]$size)] <- size
    }
  }
  return(dat)
}

set_alpha <- function(dat,alpha){
  if(is.null(alpha))
    alpha <- 0.7
  for(i in seq_along(dat)){
    if(!is.null(alpha) & nrow(dat[[i]]>0)){
      if(is.vector(alpha) & (length(alpha) >= i))
        dat[[i]]$alpha <- alpha[i]
      else
        dat[[i]]$alpha <- alpha
    }
  }
  return(dat)
}

set_shape <- function(dat,shape,locuszoomplot=F){
  if(is.null(shape))
    shape <- 19
  for(i in seq_along(dat)){
    if(!is.null(shape) & nrow(dat[[i]]>0)){
      if(is.vector(shape) & (length(shape) >= i))
        dat[[i]]$shape <- shape[i]
      else
        dat[[i]]$shape <- shape
    }
    if(locuszoomplot & ("R2" %in% colnames(dat[[i]]))){
        dat[[i]]$shape <- ifelse(dat[[i]]$R2 == 1, 18, dat[[i]]$shape)
        dat[[i]]$shape[is.na(dat[[i]]$shape)] <- shape
    }
  }
  return(dat)
}

set_log10p <- function(dat, ntop){
  for(i in seq_along(dat)){
    df <- dat[[i]]
    if(ntop > length(dat)){ ntop <- length(dat)}
    if(i <= ntop){
      dat[[i]] <- df %>% dplyr::mutate(log10p=-log10(P))

    }else{
      dat[[i]] <- df %>% dplyr::mutate(log10p=log10(P))
    }
  }
  return(dat)
}

add_log10p_wo_trans <- function(dat, ntop){
  for(i in seq_along(dat)){
    df <- dat[[i]]
    if(ntop > length(dat)){ ntop <- length(dat)}
    if(i <= ntop){
      dat[[i]] <- df %>% dplyr::mutate(log10p=P)
      
    }else{
      dat[[i]] <- df %>% dplyr::mutate(log10p=P)
    }
  }
  return(dat)
}


get_db <- function(build){
   if(is.character(build) || is.numeric(build)){
    if(tolower(build) %in% c("38","hg38","grch38"))
      db <- enshuman::hg38
    else if(tolower(build) %in% c("37","hg37","grch37"))
      db <- enshuman::hg37
    else{
      ext_db <- get(build)
      if(is.data.frame(class(ext_db))){
         db <- ext_db
      }
      else if(is.list(class(ext_db))){
        warning(paste("The provided build is a list. It has to be either a number, string or a data frame!\n Using the default build enshuman::hg38 instead" ))
        db <- enshuman::hg38
      }
      else{
        warning(paste("Could not find a build called [",build,"] to use for annotation. \n Using the default build GRCh38 (build=38) instead. ", sep=""))
        db <- enshuman::hg38
      }
    }
  }
  else if(is.data.frame(build)){
       db <- build 
  }
  else if(is.list(class(build))){
    warning(paste("The provided build is a list. It has to be either a number, string or a data frame! \n Using the default build GRCh38 (build=38) instead."))
    db <- enshuman::hg38
    }
  else{
    warning(paste("The build is not in the correct format. It has to be either a number,string or a data frame!\nUsing the default build GRCh38 (build=38) instead. ", sep=""))
    db <- enshuman::hg38
  }
  return(db)    
}


get_genes <- function(chr,xmin=0,xmax=NULL,protein_coding_only=FALSE, build=38){
  if(is.null(xmax)){
    xmax <- chr_lengths[chr_lengths$V1 == chr,]$V2
  }
  chr <- gsub("chr", "", chr)
  if(chr == "23"){ chr="X"}
  
  genes <-  get_db(build=build) %>% dplyr::filter(chrom==chr) %>% dplyr::select(-exon_chromstart,-exon_chromstart) %>% dplyr::filter(gene_start>xmin & gene_start<xmax | gene_end > xmin & gene_end < xmax | gene_start < xmin & gene_end>xmax | gene_start == xmin | gene_end == xmax)

  if(protein_coding_only)  
      genes <- genes %>% dplyr::filter(biotype=="protein_coding")
   return(genes)
}

get_exons <- function(chr,xmin=0,xmax=NULL,protein_coding_only=FALSE, build=38){
  if(is.null(xmax)){
    xmax <- chr_lengths[chr_lengths$V1 == chr,]$V2
  }
  chr <- gsub("chr", "", chr)
  if(chr == "23"){ chr="X"}
  
  exons <- get_db(build=build)  %>% dplyr::filter(chrom==chr)  %>% dplyr::filter(gene_start>xmin & gene_start<xmax | gene_end > xmin & gene_end < xmax | gene_start < xmin & gene_end>xmax | gene_start == xmin | gene_end == xmax)
  
  if(protein_coding_only)  
    exons <- exons %>% dplyr::filter(biotype=="protein_coding")
  return(exons)
}


get_chr_from_df <- function(df){
  #take the chromosome from the dataframe
  if(length(unique(df$CHROM)) == 1)
    chr <- unique(df$CHROM)
  else
    stop("More than one chromosome in the input data, please select which chromosome to display by setting the chr argument (e.g. chr='chr10') or use input data with just one chromosome")
  return(chr)
}


get_shades <- function(chr_lengths_and_offsets,dat,ntop=ntop,include_chrX=FALSE,ymin=NULL,ymax=NULL){
  coords <- get_x1_x2(chr_lengths_and_offsets)
  y1 <- c(rep(0, coords$n_offsets))  #if there is no bottom plot
  if(is.null(ymin))
    ymin <- get_ymin(dat)
  if(length(dat) > ntop)
    y1 <- c(rep(ymin, coords$n_offsets))
  if(is.null(ymax))
    ymax <- get_ymax(dat)
   
  shades <- data.frame(x1 = coords$x1,
                        x2 = coords$x2,
                        y1 = y1,
                        y2 = c(rep(ymax, coords$n_offsets)))

  return(shades)
}

get_x1_x2 <- function(chr_lengths_and_offsets){
  chrs <- chr_lengths_and_offsets$CHROM
  n_offsets <- floor(length(chrs)/2)
  offsets <- stats::setNames(chr_lengths_and_offsets$offset,chr_lengths_and_offsets$CHROM)

  x1 <- c()
  even_no <- as.numeric(subset(chrs, chrs %% 2 == 0))
  for(i in 1:length(even_no)){
    x1 <- c(x1, offsets[[even_no[i]]])
  }
  x2 <- c()
  odd_no <- as.numeric(subset(chrs, chrs %% 2 != 0))
 
   if(length(odd_no) > 1){
    for(i in 2:length(odd_no)){
      x2 <- c(x2, offsets[[odd_no[i]]])
    }
  }
  else if(length(odd_no) == length(even_no) & length(odd_no) == 1) {
    last_chr <- even_no[length(even_no)]
    last_chr_length_df <- chr_lengths_and_offsets %>% filter(CHROM == last_chr ) 
    last_chr_length <- last_chr_length_df$m
    final_x2 <- offsets[[last_chr]] + last_chr_length*1.02 
    x2 <- c(x2, final_x2)
  }
  if(length(x1) > length(x2)){
    last_chr <- even_no[length(even_no)]
    last_chr_length_df <- chr_lengths_and_offsets %>% filter(CHROM == last_chr ) 
    last_chr_length <- last_chr_length_df$m
    final_x2 <- offsets[[last_chr]] + last_chr_length*1.02 
    x2 <- c(x2, final_x2)
  }
  return(list(x1=x1, x2=x2,n_offsets=n_offsets))
}




get_ymax <- function(dat){
  ymax <- 0
  if(is.data.frame(dat)) dat <- list(dat)
  for(i in seq_along(dat)){
    if(length(dat[[i]]$log10p) > 0){
      tmp <- max(dat[[i]]$log10p)
      if(tmp>ymax){
        ymax <- tmp
      }
    }
  }
  return(ymax)
}

get_ymin <- function(dat){
  ymin <- 5
  if(is.data.frame(dat)) dat <- list(dat)
    for(i in seq_along(dat)){
      if(length(dat[[i]]$log10p) > 0){
        tmp <- min(dat[[i]]$log10p)
        if(tmp<ymin){
          ymin <- tmp
      }
    }
    }
  return(ymin)
}

get_chr_lengths_and_offsets <- function(dat, get_chr_lengths_from_data){
  if(get_chr_lengths_from_data){
    chrs <- get_chrs_from_data(dat) %>% as.numeric()
    chrs2plot <- get_max_value_by_chr_from_data(dat, chrs)
  }
  else{
    #create the offsets from the internal chr_lengths data
    chr_lengths$CHROM <- gsub("chr","", chr_lengths$V1)
    chr_lengths <- chr_lengths %>% dplyr::filter(! V1 %in% c("chrM") )
    chr_lengths[chr_lengths$CHROM=="X",'CHROM'] <- "23"
    chr_lengths[chr_lengths$CHROM=="Y",'CHROM'] <- "24"
    chr_lengths$CHROM <- as.integer(chr_lengths$CHROM)
    chrs <- c(1:23)
    chrs2plot <- chr_lengths %>% filter(CHROM %in% chrs)
  }
  chr_lengths_and_offsets <- chrs2plot %>% dplyr::group_by(CHROM) %>% dplyr::summarize(m=V2) %>% dplyr::mutate(offset=cumsum(as.numeric(lag(m, default=0))))
  return(chr_lengths_and_offsets)
}


get_ticks <- function(dat,chr_lengths_and_offsets,chr_ticknames,chr_map, show_all_chrticks=FALSE, hide_chrticks_from_pos=17, hide_chrticks_to_pos=NULL, hide_every_nth_chrtick=2, get_chr_lengths_from_data=T){
  df <- dat[[1]]
  for(i in seq_along(dat)){ if(length(unique(dat[[i]]$CHROM))  > length(unique(df$CHROM))){ df <- dat[[i]] } }
  ticknames=NULL
   if(!is.null(chr_ticknames)){
    if(length(names(chr_map)) == length(chr_ticknames))
      ticknames=chr_ticknames
    else
      warning(paste0("The length of the provided chr_ticknames vector [",length(chr_ticknames),"] does not match the number of chromosomes in the dataset(s) [", length(chr_map), "]. Using the default chr_ticknames instead!!"))
  }
  if(is.null(chr_ticknames)){
    if(get_chr_lengths_from_data){
    chrs <- names(chr_map)
    numeric_chrs <- chrs[grepl('^-?[0-9.]+$', chrs)] %>% as.numeric() %>% sort()
    non_numeric_chrs <- chrs[!grepl('^-?[0-9.]+$', chrs)]
    chr_order <- append(c(numeric_chrs), c(non_numeric_chrs))
    if(is.null(hide_chrticks_to_pos))
      hide_chrticks_to_pos <- length(numeric_chrs)
    chr_ticknames <- chr_order
  
    if(length(numeric_chrs) > hide_chrticks_from_pos){
      if(! show_all_chrticks){
        hide_chr <- numeric_chrs[seq(hide_chrticks_from_pos, hide_chrticks_to_pos, hide_every_nth_chrtick)]
        chr_ticknames[(chr_ticknames %in% hide_chr) == TRUE] <- ""
      }
      else{
        chr_ticknames <- chr_order
      }
    }
    ticknames <- chr_ticknames
    }
    else{
      ticknames <- c(1:16, '',18, '',20, '',22, 'X')
    }
  }
 tickpos <-chr_lengths_and_offsets %>% dplyr::group_by(CHROM)%>%
    dplyr::summarize(pm=offset+(m/2))%>%
    dplyr::pull(pm)
  names(tickpos) <- NULL
  return(list(names=ticknames, pos=tickpos))
}

get_pos_with_offset4list <- function(dat, offsets){
  for(i in seq_along(dat)){
    dat[[i]] <- get_pos_with_offset(dat[[i]], offsets)
  }
  return(dat)
}

get_pos_with_offset <- function(df,offsets){
  df$POS_orig <- df$POS
  df <- df %>% dplyr::mutate(POS=POS_orig+offsets[CHROM])
  return(df)
}


#' Get the index/lead variants
#'
#' @description
#'
#' \code{get_lead_snps()} Get the top variants within 1 MB windows of the genome with association p-values below the given threshold 
#'
#'
#' @param df Dataframe, GWAS summary statistics
#' @param genome_wide_thresh A number. P-value threshold for genome wide significant loci (5e-08 by default)
#' @param suggestive_thresh A number. P-value threshold for suggestive loci (1e-06 by default)
#' @param flank_size A number (default = 1e6). The size of the flanking region for the significant and suggestitve snps.
#' @param region_size A number (default = 1e6). The size of the region for top snp search. Only one snp per region is returned.
#' @return List of genome wide and suggestive loci.
#' @export
#' @examples
#' \dontrun{
#' get_sign_and_sugg_loci(CD_UKBB)
#' }
#'

get_sign_and_sugg_loci <- function(df, genome_wide_thresh = 5e-8, suggestive_thresh = 1e-6, flank_size = 1e6, region_size = 1e6) {
  genome_wide_snps <- data.frame()
  suggestive_snps <- data.frame()
  leads <- df |> get_lead_snps(thresh = suggestive_thresh, region_size = region_size, verbose = FALSE)
   if (nrow(leads) == 0) {
    return(list(genome_wide_snps = genome_wide_snps, suggestive_snps = suggestive_snps))
  }
  genome_wide <- leads |> filter(P < genome_wide_thresh)
  suggestive <- leads |> filter(P < suggestive_thresh & P >= genome_wide_thresh)
  if (nrow(genome_wide) != 0) {
    for (i in 1:nrow(genome_wide)) {
      genome_wide_snps <- bind_rows(genome_wide_snps, df |>
                                      filter(CHROM == genome_wide$CHROM[i] & POS >= genome_wide$POS[i] - (flank_size/2) & POS <= genome_wide$POS[i] + (flank_size/2))) |>
        distinct()
    }
  }
  if (nrow(suggestive) != 0) {
    # remove suggestive that are also genome wide snps
    suggestive <- suggestive |> anti_join(genome_wide, by = c("CHROM", "POS"))
    if (nrow(suggestive) != 0) {
      for (i in 1:nrow(suggestive)) {
        suggestive_snps <- bind_rows(suggestive_snps, df |>
                                       filter(CHROM == suggestive$CHROM[i] & POS >= suggestive$POS[i] - (flank_size/2) & POS <= suggestive$POS[i] + (flank_size/2))) |>
          distinct()
      }
    }
  }
  return(list(genome_wide_snps = genome_wide_snps, suggestive_snps = suggestive_snps))
}


#' Get the index/lead variants
#'
#' @description
#'
#' \code{get_lead_snps()} Get the top variants within 1 MB windows of the genome with association p-values below the given threshold 
#'
#'
#' @param df Dataframe
#' @param thresh A number. P-value threshold, only extract variants with p-values below this threshold (5e-08 by default)
#' @param protein_coding_only Logical, set this variable to TRUE to only use protein_coding genes for annotation
#' @param chr String, get the top variants from one chromosome only, e.g. chr="chr1"
#' @param .checked Logical, if the input data has already been checked, this can be set to TRUE so it wont be checked again (FALSE by default)
#' @param verbose Logical, set to TRUE to get printed information on number of SNPs extracted
#' @param keep_chr Logical, set to FALSE to remove the "chr" prefix before each chromosome if present (TRUE by default) 
#' @return Dataframe of lead variants. Returns the best variant per MB (by default, change the region size with the region argument) with p-values below the input threshold (thresh=5e-08 by default)
#' @export
#' @inheritParams regionplot
#' @examples
#' \dontrun{
#' get_lead_snps(CD_UKBB)
#' }
#'
get_lead_snps <- function(df, thresh=5e-08,region_size=1000000,protein_coding_only=FALSE,chr=NULL, .checked=FALSE, verbose=NULL, keep_chr=TRUE){
  variants <- df[0,]
  dat <- df
  if(is.data.frame(dat)) dat <- list(dat)
  chrPrefix=0
  
  dat[[1]] <- dat_column_chr_pos_check(dat[[1]]) 
  
  if("CHROM"  %in% colnames(dat[[1]])){
    if(!is.integer(dat[[1]][1,"CHROM"])){chrPrefix=1}
  }
  if(! is.numeric(region_size)){
    region_size <- convert_region_size(region_size)
  }
  if(! .checked){
    dat <- dat_check(dat, verbose=verbose)
  }

  df <- dat[[1]]
  if(! is.null(chr)){
    chr <- gsub("chr","", chr)
    df <- df %>% dplyr::filter(CHROM == chr)
  }
  
  df <- df %>% dplyr::filter(P<thresh)
  if(protein_coding_only & ("biotype" %in% colnames(df))){
    df <- df %>% dplyr::filter(biotype == "protein_coding")
  }
  if(nrow(df) > 0){
  	lead_snps <- do.call(rbind,
  		lapply(split(df, df$CHROM), function (df_by_chr) {
  			res <- data.frame()
  			df_by_chr <- dplyr::arrange(df_by_chr, P)
  			while (nrow(df_by_chr) > 0) {
  				lead_snp <- dplyr::slice(df_by_chr, 1)
  				res <- dplyr::bind_rows(res, lead_snp)
  				df_by_chr <- dplyr::filter(df_by_chr, abs(POS - lead_snp$POS) > region_size / 2)
  			}
  			res
  		}))
    variants <- lead_snps %>% dplyr::distinct(CHROM, POS, .keep_all = T)
    variants$color <-  dat[[1]][1,"color"]
    if(! is.null(verbose)){
      if(verbose){
        print(paste("Found ",length(variants$POS), " index/lead variants with a p-value below ", thresh, sep=""))
        print(paste("Index/lead variant: the top variant within a",format(region_size, scientific = F),"bp region_size"), sep="")
      }
    }
  }else{
    if(is.null(verbose)){
      print(paste("There are no SNPs with p-values below ",thresh, " in the input dataset. Use the [thresh] argument to lower the threshold.",sep=""))
    }else if(verbose){
      print(paste("There are no SNPs with p-values below ",thresh, " in the input dataset. Use the [thresh] argument to lower the threshold.",sep=""))
    }
    
  }
  if("tmp" %in% colnames(variants)){
    variants <- variants %>% dplyr::select(-tmp)
  }
  if(nrow(variants) > 0){
    if(keep_chr & chrPrefix){
      variants$CHROM <- gsub("(chr|CHR|Chr)","", variants$CHROM)
      variants$CHROM  <- paste0("chr", variants$CHROM)
    }
  }
  return(variants)
}


#' Get the genetic position of a gene by its gene name
#'
#' @description
#'
#' \code{get_genes_by_Gene_Symbol()} Get genes by their gene symbol/name
#' Required parameters is on gene name or a vector of gene names
#'
#' @param genes A string or vector of strings representing gene names, (e.g. "FTO") or  (c("FTO","NOD2"))
#' @param chr A string, search for the genes on this chromosome only, (e.g chr="chr1")
#' @param build A string, genome build, choose between builds 37 (GRCh37) and 38 (GRCh38) (default is 38)
#' @return Dataframe of genes
#' @export
#'
#' @examples
#' \dontrun{
#'   get_genes_by_Gene_Symbol(c("FTO","THADA"))
#' }
#'
get_genes_by_Gene_Symbol <- function(genes, chr=NULL, build=38){
  genes_df <- get_db(build) %>% dplyr::filter(gene_symbol %in% genes) 
  if(! is.null(chr)){
    chr <- gsub('chr','',chr)
    genes_df <- genes_df %>% dplyr::filter(chrom == paste("chr",chr,sep=""))
  }
  genes_df <- genes_df %>% dplyr::mutate(POS = gene_start + round((gene_end-gene_start)/2)) %>% dplyr::rename(Gene_Symbol=gene_symbol,CHROM=chrom)
  dist_genes <- genes_df %>% dplyr::distinct(CHROM,gene_start,gene_end, .keep_all = T)
  return(dist_genes)
}


#' Get the genetic position of a gene by gene name
#'
#' @description
#'
#' \code{get_gene_coords()} Get the gene coordinates for a gene
#' Required parameter is gene name
#'
#' @param gene_name A string representing a gene name (e.g. "FTO")
#' @param chr A string, search for the genes on this chromosome only, (e.g chr="chr1")
#' @param build A string, genome build, choose between builds 37 (GRCh37) and 38 (GRCh38) (default is 38)
#' @return Dataframe with the gene name and its genetic coordinates
#' @export
#'
#' @examples
#' \dontrun{
#' get_gene_coords("FTO")
#' }
#'

get_gene_coords  <- function(gene_name,chr=NULL, build=38){
  gene <- get_db(build) %>% dplyr::filter(gene_symbol == gene_name) %>% dplyr::select(-exon_chromstart,-exon_chromend)
  if(! is.null(chr)){
    chr <- gsub("chr", "", chr)
    chr <- paste("chr",chr,sep="")
    gene <- gene  %>% dplyr::filter(chrom == chr) 
  }
  if(is.null(gene)){
    print(paste("Could not find a gene with the gene name: [", gene_name, "] in build ",build, sep=""))
  }
  return(gene)
}

get_legend<-function(p1){
  tmp <- ggplot_gtable(ggplot_build(p1))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}



#' Get the top hit from the dataframe
#'
#' @description
#'
#' \code{get_top_snp()} Get the top hit from the dataframe
#' All other input parameters are optional
#'
#' @param df Dataframe containing association results
#' @param chr String, get the top hit in the data frame for this chromosome. If chromosome is not provided, the top hit from the entire dataset is returned.

#' @return Dataframe containing the top hit
#' @export
#'
#' @examples
#' \dontrun{
#' get_top_snp(CD_UKBB, chr="chr1")
#' }
#'
get_top_snp <- function(df, chr=NULL){
  if (!is.null(chr)) {
    chr_tmp <- gsub('chr','',chr)
    chr <- paste("chr",chr_tmp, sep="")
    df <- df %>%
      filter(CHROM == chr)
  } else {
    print("Returning the top hit over the entire genome, since chromosome was not provided as an argument")
  }
  top_hit <- df %>%
    dplyr::arrange(P) %>%
    head(n = 1)

  return(top_hit)
}


#' Get the top hit from the dataframe
#'
#' @description
#'
#' \code{get_topr_colors()} Get the top hit from the dataframe
#' All other input parameters are optional
#'
#' @return Vector of colors used for plotting
#' @export
#'
#' @examples
#' \dontrun{
#' get_topr_colors()
#' }
#'
get_topr_colors <- function(){
  return(c("darkblue","#E69F00","#00AFBB","#999999","#FC4E07","darkorange1","darkgreen","blue","red","magenta","skyblue","grey40","grey60","yellow","black","purple","orange","pink","green","cyan"))
}

set_genes_pos_adj <- function(genes, offsets){
  #CHROM,POS,Gene_Symbol
  genes <- genes %>% dplyr::mutate(CHROM=gsub('chr','',CHROM))
  genes[genes$CHROM=='X', 'CHROM'] <- "23"
  genes$CHROM <- as.integer(genes$CHROM)
  genes <- genes %>% dplyr::mutate(pos_adj=POS+offsets[CHROM])
  return(genes)
}

#' Get SNPs/variants within region
#'
#' @description
#'
#' \code{get_snps_within_region()}
#'
#' @param df data frame of association results with the columns CHR and POS
#' @param region A string representing the genetic region (e.g chr16:50693587-50734041)
#' @param chr A string, chromosome (e.g. chr16)
#' @param xmin An integer, include variants with POS larger than xmin
#' @param xmax An integer, include variants with POS smaller than xmax
#' @param keep_chr Deprecated: Logical, set to FALSE to remove the "chr" prefix before each chromosome if present (TRUE by default) 
#' @return the variants within the requested region
#' @export
#'
#' @examples
#' \dontrun{
#' get_snps_within_region(CD_UKBB, "chr16:50593587-50834041")
#' }
#'

get_snps_within_region <- function(df, region, chr=NULL, xmin=NULL, xmax=NULL,keep_chr=NULL){
  snps <- NULL
  if (!missing(keep_chr)) deprecated_argument_msg(keep_chr)
  df <- dat_column_chr_pos_check(df)
  if(!is.null(region)){
    tmp <- unlist(stringr::str_split(region, ":"))
    chr <- tmp[1]
    tmp_pos <- unlist(stringr::str_split(tmp[2], "-"))
    xmin <- as.numeric(tmp_pos[1])
    xmax <- as.numeric(tmp_pos[2])
  }
  if(!is.null(chr) & !is.null(xmin) & !is.null(xmax)){
    snps <- df %>% filter(CHROM == chr & POS >= xmin & POS <= xmax)
    if(length(snps$POS) < 1){
      print(paste("There are no SNPs within the region: ", chr, ":", xmin, "-", xmax, sep=""))
    }
  }
  return(snps)
}

get_chrs_from_data = function(dat){
  all_chrs=NULL
  for(i in seq_along(dat)){
    unique_chrs=unique(dat[[i]]$CHROM)
    all_chrs <- c(all_chrs, unique_chrs)
  }
  chrs <- unique(all_chrs) %>% sort()
  return(chrs)
}


get_max_value_by_chr_from_data = function(dat, chrs){
  max_chr_values=c()
  for(i in 1:(length(chrs))){
    max_chr_val=0
    for(j in seq_along(dat)){ #for(j in seq_along(dat)){
      dat_chr <- dat[[j]] %>% dplyr::filter(CHROM==chrs[i])
      if(nrow(dat_chr)> 0){
        max_val <- max(dat_chr$POS, na.rm = T)
        if(max_val > max_chr_val){
          max_chr_val <- max_val  
        }
      }
    }
    max_chr_values[i] <- max_chr_val*1.02
  }
  chr_lengths <- data.frame("CHROM" = unique(chrs), "V2" = max_chr_values)

  return(chr_lengths)
}
Any scripts or data that you put into this service are public.
topr documentation built on June 22, 2024, 9:59 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
topr
Create Custom Plots for Viewing Genetic Association Results

R/setters_and_getters.R
In topr: Create Custom Plots for Viewing Genetic Association Results

Documented in get_gene_coords get_genes_by_Gene_Symbol get_lead_snps get_sign_and_sugg_loci get_snps_within_region get_topr_colors get_top_snp

Try the topr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

topr Create Custom Plots for Viewing Genetic Association Results

R/setters_and_getters.R In topr: Create Custom Plots for Viewing Genetic Association Results

Documented in get_gene_coords get_genes_by_Gene_Symbol get_lead_snps get_sign_and_sugg_loci get_snps_within_region get_topr_colors get_top_snp

Try the topr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

topr
Create Custom Plots for Viewing Genetic Association Results

R/setters_and_getters.R
In topr: Create Custom Plots for Viewing Genetic Association Results