#' Formating Data for Regional Association ComparER
#'
#' This group of functions allows you to creat a plot of -log10(P-values) of an association study by their genomic position, for example, the results of a GWAS or eQTL study. This function carries out the formatting necessary for your association data to be compatible with the plotting functions.
#' @param assoc_data required. A dataframe that has columns containing the chromosome, physical position, and p-values or -log10(p-values) of the association, and can optionally have columns containing R2 information for LD in the region, or rsID numbers for the associated SNPs.
#' @param chr_col required. numeric. index of column in assoc_data containing chromosome information.
#' @param pos_col required. numeric. index of column in assoc_data containing genomic position information.
#' @param log10p_col required, if no p_col specified. numeric. index of column in assoc_data containing -log10(p-value)s.
#' @param p_col required, if no log10p_col column specified. numeric. index of column in assoc_data containing p-values.
#' @param ld_col optional. numeric. Required if you want to use the LD data in your data set in your plot, index of column in assoc_data containing LD information, e.g. R2 or D' values
#' @param rs_col optional. numeric. Required if you want to use the use ldRACER to pull LD information from the 1000 genomes phase III project, or if you want to make a scatter comparison plot
#'
#' @keywords association plot linkage disequilibrium
#' @concept GWAS
#' @export
#' @examples
#' data(mark3_bmd_gwas)
#' head(formatRACER(assoc_data = mark3_bmd_gwas, chr_col = 3, pos_col = 4, p_col = 11, rs_col = 2))
formatRACER <- function(assoc_data, chr_col, pos_col, p_col=NULL, log10p_col=NULL, ld_col=NULL, rs_col = NULL){
if(missing(chr_col)){
stop("Please specify which column contains chromosome information.")
}else if(missing(pos_col)){
stop("Please specify which column contains genomic position information.")
}else if(is.null(log10p_col) && is.null(p_col)){
stop("Please specify which column contains p-values or -log10(p-values).")
}
message("Formating association data...")
if(class(chr_col) == "numeric"){
colnames(assoc_data)[chr_col] = "CHR"
}else if(class(chr_col) == "character"){
if((chr_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == chr_col)] = "CHR"
}else{
stop("The chromosome column you specified is not in the association data frame.")
}
}
if(class(pos_col) == "numeric"){
colnames(assoc_data)[pos_col] = "POS"
}else if(class(pos_col) == "character"){
if((pos_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == pos_col)] = "POS"
}else{
stop("The position column you specified is not in the association data frame.")
}
}
message("Processing -log10(p-values)...")
if(!is.null(log10p_col)){
if(class(log10p_col) == "numeric"){
colnames(assoc_data)[log10p_col] = "LOG10P"
}else if(class(log10p_col) == "character"){
if((log10p_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == log10p_col)] = "LOG10P"
}else{
stop("The -log10p column you specified is not in the association data frame.")
}
}
}else if(!is.null(p_col)){
if(class(p_col) == "numeric"){
colnames(assoc_data)[p_col] = "P"
}else if(class(p_col) == "character"){
if((p_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == p_col)] = "P"
}else{
stop("The p-value column you specified is not in the association data frame.")
}
}
assoc_data$LOG10P = -log10(assoc_data$P)
}
if(!is.null(rs_col)){
if(class(rs_col) == "numeric"){
colnames(assoc_data)[rs_col] = "RS_ID"
}else if(class(rs_col) == "character"){
if((rs_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == rs_col)] = "RS_ID"
}else{
stop("The rsID column you specified is not in the association data frame.")
}
}
}
if(!is.null(ld_col)){
message("Processing input LD information...")
if(class(ld_col) == "numeric"){
colnames(assoc_data)[ld_col] = "LD"
}else if(class(ld_col) == "character"){
if((ld_col %in% colnames(assoc_data) == TRUE)){
colnames(assoc_data)[which(colnames(assoc_data) == ld_col)] = "LD"
}else{
stop("The LD column you specified is not in the association data frame.")
}
}
assoc_data$LD = as.numeric(as.character(assoc_data$LD))
assoc_data$LD_BIN <- cut(assoc_data$LD,
breaks=c(0,0.2,0.4,0.6,0.8,1.0),
labels=c("0.2-0.0","0.4-0.2","0.6-0.4","0.8-0.6","1.0-0.8"))
assoc_data$LD_BIN = as.character(assoc_data$LD_BIN)
assoc_data$LD_BIN[is.na(assoc_data$LD_BIN)] <- "NA"
assoc_data$LD_BIN = as.factor(assoc_data$LD_BIN)
assoc_data$LD_BIN = factor(assoc_data$LD_BIN, levels = c("1.0-0.8", "0.8-0.6", "0.6-0.4", "0.4-0.2", "0.2-0.0", "NA"))
}
# read in, format, and filter data sets
message("Preparing association data...")
assoc_data <- as.data.frame(assoc_data)
assoc_data$POS = as.numeric(as.character(assoc_data$POS))
assoc_data$LOG10P = as.numeric(as.character(assoc_data$LOG10P))
assoc_data$CHR = as.numeric(as.character(assoc_data$CHR))
return(assoc_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.