R/compare_GWASs.R
In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

Documented in compare_GWASs

#' Compare summary statistics of GWAS result files
#'
#' This function compares the key metrics of previously inspected files.
#' This allows the user to check if the results of these studies are comparable (important when running a meta-analysis) and that there are no significant anomalies.
#'
#' @param input.file.list list, full path of the Inspector result files. This file is in RDS format and will be generated for each GWAS result file during the inspection algorithm.
#' @param output.path character, full path to the folder where output files should be saved.
#' @return Key metrics report and plots of previously inspected files are generated and saved in the specified output folder.
#'
compare_GWASs <- function(input.file.list, output.path)
{
  if(missing(input.file.list) || missing(output.path))
    stop('Function arguments are not set.', call. = FALSE)


  on.exit({

  })


  ##==========================

  output.path <- check_output_dir(output.path)


  #
  .QC <- new.env()

  .QC$package.name <- 'GWASinspector'

  .QC$package.description <- 'Comprehensive, efficient and easy to use quality control of genome-wide association study results'

  .QC$script.version <- '1.5.5'

  .QC$verbose <- FALSE
  #

  .QC$txt.report.path  <-  paste(output.path,'report.txt',sep = '/')
  .QC$precisionPlotPath <- paste(output.path,'precision_plot.png',sep = '/')
  .QC$skew_kurtPlotPath <- paste(output.path,'skew_kurt.plot.png',sep = '/')
  .QC$effsizePlotPath <- paste(output.path,'effect_size.png',sep = '/')



  .QC$input.file.list  <- check_if_files_exist_and_more_than_one(input.file.list)

  .QC$input.file.list <- lapply(.QC$input.file.list,
                            readRDS)

  for(i in 1:length(.QC$input.file.list))
  {
    .QC$input.file.list[[i]]$number = i
    #input.file.list[[i]]$effect.plot$plot_env$file.number = i
    .QC$input.file.list[[i]]$effect.plot$labels$x = i


    ## compatibility with older versions
    if(is.null(.QC$input.file.list[[i]]$effect_type_string))
      .QC$input.file.list[[i]]$effect_type_string = 'BETA'

    if(is.null(.QC$input.file.list[[i]]$tables$variable.summary.HQ))
      .QC$input.file.list[[i]]$tables$variable.summary.HQ = data.table()
  }


  create_comparison_plots(.QC$input.file.list,
                          'png',
                          .QC$precisionPlotPath,
                          .QC$skew_kurtPlotPath,
                          .QC$effsizePlotPath)

  multi_file_txt_report_file(.QC$input.file.list,
                             .QC$txt.report.path)


  multi_file_html_report_file(.QC,
                              output.path ,
                              'report.html')




}



############################################################

create_comparison_plots <- function(input.file.list,
                                    graphic.device ,
                                    precisionPlotPath ,
                                    skew_kurt ,
                                    effsizePlotPath)
{


  tryCatch( suppressWarnings(multi_study_precision_plot(input.file.list ,
                                                        graphic.device ,
                                                        precisionPlotPath)),
            error = function(err){
              message(paste('Warning in precision plot:',err$message))
            }
  )

  # skew-kurt plot
  tryCatch(multi_study_skew_kurt_plot(input.file.list, graphic.device , skew_kurt),
           error = function(err){
             message(paste('Warning in skewness-kurtosis plot:',err$message))
           }
  )

  # boxplot effects
  tryCatch(multi_study_eff_plot(input.file.list , graphic.device , effsizePlotPath),
           error = function(err){
             message(paste('Warning in effect-size comparison plot:',err$message))
           }
  )
}


check_if_files_exist_and_more_than_one <- function(input.file.list)
{

  if(length(input.file.list) < 2)
    stop('At least 2 study files should be selected.')



  input.file.exists = sapply(input.file.list, check_each_GWAS_file)
  input.file.list = input.file.list[which(input.file.exists)]

  if(length(input.file.list) < 2)
    stop('Not enough file for comparison',call. = FALSE)
  else
    return(input.file.list)
}


check_each_GWAS_file <- function(GWAS.file.path)
{
  if(!file.exists(GWAS.file.path))
  {
    message(paste('GWAS file not found:' , GWAS.file.path))
    return(FALSE)
  }
  else{
    return(TRUE)
  }
}


check_output_dir <- function(output.path)
{
  new.dir <- NULL

  if(!dir.exists(output.path))
  {
    stop(sprintf("%s: %s", "Output directory does not exist.",output.path))
  }
  else
  {
    # remove the trailing backslash from path
    output.path <- gsub('/+$', '' , output.path)

    new.dir <- paste(output.path,'comparing_GWASs',sep = '/')

    tryCatch({
      if (!dir.exists(new.dir))
      {
         dir.create(new.dir)
      }
      else
      {
        new.dir <- sprintf("%s_%s", new.dir, as.numeric(Sys.time()))
        dir.create(new.dir)
      }
    }, error = function(err)
    {
      stop(sprintf("%s: %s", 'Error creating output directory.', err$message))
    })
  }

  message(paste("Report directory:",new.dir))
  return(new.dir)
}

check_output_report_files <- function(output.path, filename)
{
  report.path = paste(output.path,filename,sep = '/')

  # if(file.exists(report.path))
  #   stop(paste('output file exists:',report.path),call. = FALSE)
  # else
    return(report.path)
}


write_to_report_file <- function(message,txt.report.path){

  write(message,
        file= txt.report.path,
        append=TRUE)

}


multi_file_txt_report_file <- function(study.list, txt.report.path)
{

  write_to_report_file('==================================================',txt.report.path)
  write_to_report_file('============== Quality Check Report ==============',txt.report.path)
  write_to_report_file('==================================================',txt.report.path)
  write_to_report_file(' ',txt.report.path)


  ## ======================================
  report.table <- data.table(sapply(study.list, function(x) return(x$file.name)))
  report.table <- cbind(seq(1:nrow(report.table)) , report.table)
  colnames(report.table) <- c('Number', 'Study Name')
  row.names(report.table) <- seq(1:nrow(report.table))

  write_to_report_file(kable(report.table,format = 'rst'),txt.report.path)


  write_to_report_file(' ',txt.report.path)
  write_to_report_file(' ',txt.report.path)
  write_to_report_file(' ',txt.report.path)

  ## ======================================
  report.table <- t(data.table(
    ## 'File Names' = sapply(study.list, function(x) return(x$file.name)),
    'Sample Size (Max)' = sapply(study.list, function(x) return(x$MAX_N_TOTAL)),
    'Missing Columns' = sapply(study.list, function(x) return(paste(x$missing.Columns, collapse = ' | '))),
    'SNPs in input file' = sapply(study.list, function(x) return(x$input.data.rowcount)),
    'Variant count after step 1 *' = sapply(study.list, function(x) return(x$rowcount.step1)),
    'Variant count after step 2 **' = sapply(study.list, function(x) return(x$rowcount.step2)),
    'Variant count after step 3 ***' = sapply(study.list, function(x) return(x$rowcount.step3)),
    'SNP variants' = sapply(study.list, function(x) return(calculatePercent(sum(as.numeric(gsub(x=x$tables$multi_allele_count_preProcess[1:3],
                                                                                                pattern = ",",
                                                                                                replacement = ""))),x$rowcount.step3,pretty=TRUE))),
    'Non-SNP variants' = sapply(study.list, function(x) return(calculatePercent(sum(as.numeric(gsub(x=x$tables$multi_allele_count_preProcess[4:6],
                                                                                                    pattern = ",",
                                                                                                    replacement = "")))
                                                                                ,x$rowcount.step3,
                                                                                pretty=TRUE))),
    'Monomorphic' = sapply(study.list, function(x) return(calculatePercent(x$monomorphic.count,x$rowcount.step3,pretty=TRUE))),
#/	'Duplicates' = sapply(study.list, function(x) return(calculatePercent(x$duplicate.count,x$rowcount.step3,pretty=TRUE))),
    'Palindromics' = sapply(study.list, function(x) return(calculatePercent(x$palindromic.rows,x$rowcount.step3,pretty=TRUE))),
    'Genotyped variants' = sapply(study.list, function(x) return(ifelse(x$tables$imputed.tbl[IMPUTED=='genotyped',.N] == 0,
                                                                        "NA",
                                                                        calculatePercent(as.numeric(x$tables$imputed.tbl[IMPUTED=="genotyped",N]),x$rowcount.step3,pretty=TRUE)))),
    'Imputed variants' = sapply(study.list, function(x) return(ifelse(x$tables$imputed.tbl[IMPUTED=='imputed',.N] == 0,
                                                                      "NA",
                                                                      calculatePercent(as.numeric(x$tables$imputed.tbl[IMPUTED=="imputed",N]),x$rowcount.step3,pretty=TRUE)))),
    'Negative-strand SNPs' = sapply(study.list, function(x) return(calculatePercent(x$neg.strand.count,x$rowcount.step3,pretty=TRUE))),
    'Allele Frequency Correlation (Standard Ref)' = sapply(study.list, function(x) return(x$AFcor.std_ref)),
    'Palindromic Allele Frequency correlation (Standard Ref)' = sapply(study.list, function(x) return(x$AFcor.palindromic.std_ref)),
    'Allele Frequency Correlation (Alternative Ref)' = sapply(study.list, function(x) return(x$AFcor.alt_ref)),
    'Palindromic Allele Frequency correlation (Alternative Ref)' = sapply(study.list, function(x) return(x$AFcor.palindromic.alt_ref)),
    'Lambda - Total' = sapply(study.list, function(x) return(x$lambda)),
    'Lambda - Genotyped' = sapply(study.list, function(x) return(x$lambda.gen)),
    'Lambda - Imputed' = sapply(study.list, function(x) return(x$lambda.imp)),
    'P-value Correlation' = sapply(study.list, function(x) return(x$PVcor)),
    "Visscher's Statistic (HQ variants)" = sapply(study.list, function(x) return(x$Visschers.stat.HQ)),
    "Fixed HWE P-value" = sapply(study.list, function(x) return(x$fixed.hwep)),
    "Fixed Imputation Quality" = sapply(study.list, function(x) return(x$fixed.impq)),
    "Fixed Sample Size" = sapply(study.list, function(x) return(x$fixed.n_total)),
    "Fixed Call Rate" = sapply(study.list, function(x) return(x$fixed.callrate)),
    "Effect size (All variants)" = " ",
    "-     Min." = sapply(study.list, function(x) return(x$tables$variable.summary['Min.', x$effect_type_string])),
    "-     1st Qu." = sapply(study.list, function(x) return(x$tables$variable.summary['1st Qu.',x$effect_type_string])),
    "-     Median" = sapply(study.list, function(x) return(x$tables$variable.summary['Median',x$effect_type_string])),
    "-     Mean" = sapply(study.list, function(x) return(x$tables$variable.summary['Mean',x$effect_type_string])),
    "-     3rd Qu." = sapply(study.list, function(x) return(x$tables$variable.summary['3rd Qu.',x$effect_type_string])),
    "-     Max." = sapply(study.list, function(x) return(x$tables$variable.summary['Max.',x$effect_type_string])),
    "-     Min.(HQ variants)" = sapply(study.list, function(x) return(ifelse(nrow(x$tables$variable.summary.HQ) > 0,
                                                                x$tables$variable.summary.HQ['Min.', x$effect_type_string],
                                                                NA))),
    "-     Max.(HQ variants)" = sapply(study.list, function(x) return(ifelse(nrow(x$tables$variable.summary.HQ) > 0,
                                                                x$tables$variable.summary.HQ['Max.',x$effect_type_string],
                                                                NA))),
    "Standard Error (median)" = sapply(study.list, function(x) return(x$tables$variable.summary['Median','STDERR'])),
    "Standard Error (median) (HQ variants)" = sapply(study.list, function(x) return(ifelse(nrow(x$tables$variable.summary.HQ) > 0,
                                                                                  x$tables$variable.summary.HQ['Median','STDERR'],
                                                                                  NA)))
  )
)

  colnames(report.table) <- seq(1:length(study.list))

  write_to_report_file(knitr::kable(report.table, align = "c" ,format = 'rst'),txt.report.path)


  write_to_report_file(' ',txt.report.path)
  write_to_report_file('* step1: removing variants with missing crucial values.',txt.report.path)
  write_to_report_file('** step2: removing monomorphic or duplicated variants, and specified chromosomes.',txt.report.path)
  write_to_report_file('*** step3: removing mismatched, ambiguous and multi-allelic variants that could not be verified.',txt.report.path)

  write_to_report_file('\n==============================================',txt.report.path)
  write_to_report_file(sprintf('Generated by GWASinspector package - v.%s', utils::packageVersion('GWASinspector')),txt.report.path)
  write_to_report_file(as.character(Sys.time()),txt.report.path)

  message(sprintf("Output file saved from %s input files." ,length(study.list)))
}

multi_file_html_report_file <- function(.QC, html.report.folder , html.report.file)
{
  input.file.list <- .QC$input.file.list

  if(!rmarkdown::pandoc_available())
  {
    message('pandoc module is required for converting report to Html format! check the manual on how to install.')
    return(NULL)
  }


  if(!is.element('kableExtra',installed.packages()))
  {
    message('kableExtra package is suggested for pretty Html format! check the manual on how to install.')
    report.template.file <- system.file("rmd", "multiFileReport.rmd", package = "GWASinspector")
  }
  else
  {
   # `%>%` <- NULL
   # `%>%` <<- kableExtra::`%>%`
    report.template.file <- system.file("rmd", "multiFileReport_alone_extra.rmd", package = "GWASinspector")
  }




    render.success <- tryCatch({
      # clear cache and RAM
      knitr::knit_meta(class=NULL, clean = TRUE)
      invisible(gc())

      render(report.template.file,
             output_dir = html.report.folder,
             output_file = html.report.file,
             quiet = TRUE)

      return(TRUE)

    },
    error=function(err){

      print_and_log(paste('---[ERROR saving main html file!---]\nThe result is also saved as txt and is in the output folder.',err$message),
                    'warning')

      return(FALSE)
    } )

    if(render.success)
      message(paste('HTML report file saved as ',report.output.path))


  }

Any scripts or data that you put into this service are public.

GWASinspector documentation built on April 4, 2025, 4:35 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GWASinspector
Comprehensive and Easy to Use Quality Control of GWAS Results

R/compare_GWASs.R
In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

Defines functions multi_file_html_report_file multi_file_txt_report_file write_to_report_file check_output_report_files check_output_dir check_each_GWAS_file check_if_files_exist_and_more_than_one create_comparison_plots compare_GWASs

Documented in compare_GWASs

Try the GWASinspector package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GWASinspector Comprehensive and Easy to Use Quality Control of GWAS Results

R/compare_GWASs.R In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

Defines functions multi_file_html_report_file multi_file_txt_report_file write_to_report_file check_output_report_files check_output_dir check_each_GWAS_file check_if_files_exist_and_more_than_one create_comparison_plots compare_GWASs

Documented in compare_GWASs

Try the GWASinspector package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GWASinspector
Comprehensive and Easy to Use Quality Control of GWAS Results

R/compare_GWASs.R
In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results