In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

htmltools::img(src = knitr::image_uri(system.file("extdata", "DNA.png", package = "GWASinspector")), 
               alt = 'logo', 
               style = 'position:absolute; top:0; right:20%; padding:10px;')

QC Start Time: r format(.QC$thisStudy$starttime, "%b %d %Y - %X")

QC End time: r format( .QC$thisStudy$endtime, "%b %d %Y - %X")

Script version: r .QC$script.version

Alterative header file: r basename(.QC$config$supplementaryFiles$header_translations)

Allele frequency reference dataset: r basename(.QC$config$supplementaryFiles$allele_ref_std)

`r if(!is.na(.QC$config$supplementaryFiles$allele_ref_alt)){ paste(sprintf("Allele frequency alternative reference dataset:\t%s" , basename(.QC$config$supplementaryFiles$allele_ref_alt))) }

`r if(!is.na(.QC$config$supplementaryFiles$beta_ref_std)){ paste(sprintf("Effect-size reference dataset:\t%s" , basename(.QC$config$supplementaryFiles$beta_ref_std))) }

Filter values for selecting High-Quality (HQ) variants

`r count.table <- t(data.table( "Allele frequency" = format(.QC$config$filters$HQfilter_FRQ, scientific = FALSE), "HWE p-value" = format(.QC$config$filters$HQfilter_HWE, scientific = FALSE), "Call-rate" = format(.QC$config$filters$HQfilter_cal, scientific = FALSE), "Imputation quality" = format(.QC$config$filters$HQfilter_imp, scientific = FALSE)))

colnames(count.table) <- 'Value' kable(count.table,escape='FALSE', align = "c",format = "html") `

Input File Description

Input File: r basename(.QC$thisStudy$file.path)

`r count.table <- t(data.table( "input variant count" = format(.QC$thisStudy$input.data.rowcount, big.mark="," , scientific = FALSE), "Missing crucial variable" = calculatePercent(.QC$thisStudy$missing.crucial.rowcount, .QC$thisStudy$input.data.rowcount, pretty = T), "Duplicated variants" = calculatePercent(.QC$thisStudy$duplicate.count, .QC$thisStudy$input.data.rowcount, pretty = T), "variant count after step 1"= calculatePercent(.QC$thisStudy$rowcount.step1, .QC$thisStudy$input.data.rowcount, decimal.place=3, pretty = T), "Monomorphic variants" = calculatePercent(.QC$thisStudy$monomorphic.count, .QC$thisStudy$input.data.rowcount, pretty = T)))

colnames(count.table) <- 'Count' rownames(count.table)[1] <- "Input variant count" rownames(count.table)[4] <- sprintf("Variant count after step 1")

if (.QC$thisStudy$x.chr.count.removed > 0) count.table <- rbind(count.table, t(data.table("Removed X variants" = calculatePercent(.QC$thisStudy$x.chr.count.removed, .QC$thisStudy$input.data.rowcount, pretty = T))))

if (.QC$thisStudy$y.chr.count.removed > 0) count.table <- rbind(count.table, t(data.table("Removed Y variants" = calculatePercent(.QC$thisStudy$y.chr.count.removed, .QC$thisStudy$input.data.rowcount, pretty = T)))) if (.QC$thisStudy$xy.chr.count.removed > 0) count.table <- rbind(count.table, t(data.table("Removed XY variants" = calculatePercent(.QC$thisStudy$xy.chr.count.removed, .QC$thisStudy$input.data.rowcount, pretty = T)))) if (.QC$thisStudy$m.chr.count.removed > 0) count.table <- rbind(count.table, t(data.table("Removed M variants" = calculatePercent(.QC$thisStudy$m.chr.count.removed, .QC$thisStudy$input.data.rowcount, pretty = T))))
count.table2 <- t(data.table( "variant count after step 2" = calculatePercent(.QC$thisStudy$rowcount.step2, .QC$thisStudy$input.data.rowcount, decimal.place=3, pretty = T), "variant count after step 3"= calculatePercent(.QC$thisStudy$rowcount.step3, .QC$thisStudy$input.data.rowcount, decimal.place=4, pretty = T)))

rownames(count.table2)[1] <- sprintf("Variant count after step 2 ") rownames(count.table2)[2] <- sprintf("Variant count after step 3 ")

count.table <- rbind(count.table,count.table2)

kable(count.table,escape='FALSE', align = "l",format = "html")

Column names

`r column.tbl <- rbind(.QC$thisStudy$original.File.Columns.sorted, .QC$thisStudy$renamed.File.Columns.sorted) rownames(column.tbl) <- c('Original', 'Renamed')

kable(column.tbl, align = "c" , format = "html") `

Header values are translated according to 'header_translations' file.

missing columns = r paste(.QC$thisStudy$missing.Columns,collapse = ' | ')

Variant counts

`r count.table <- t(data.table( "High Quality variants" = calculatePercent(.QC$thisStudy$HQ.count, .QC$thisStudy$rowcount.step3, pretty = T), "Low Quality variants" = calculatePercent(.QC$thisStudy$LQ.count, .QC$thisStudy$rowcount.step3, pretty = T), "Palindromic variants" = calculatePercent(.QC$thisStudy$palindromic.rows, .QC$thisStudy$rowcount.step3, pretty = T), "Non-Palindromic variants" = calculatePercent(.QC$thisStudy$non.palindromic.rows, .QC$thisStudy$rowcount.step3, pretty = T), "Palindromic variants with high allele frequency difference (> 0.15)" = calculatePercent(.QC$thisStudy$palindormicHighDiffEAF, .QC$thisStudy$palindromic.rows, pretty = T), "Non-palindromic variants with high allele frequency difference (> 0.15)" = calculatePercent(.QC$thisStudy$nonpalindormicHighDiffEAF , .QC$thisStudy$non.palindromic.rows, pretty = T,
"Palindromic variants with opposite allele frequency \"compared to the reference\" (> 0.65 for the input file and < 0.35 for the reference, or vice versa)" = calculatePercent(.QC$thisStudy$palindormicExtremeDiffEAF , .QC$thisStudy$palindromic.rows, pretty = T)))

colnames(count.table) <- 'Count'

kable(count.table,escape='FALSE', align = "l",format = "html")

The results from matching variants with reference datasets

References used for variant matching

`r match.table1 <- .QC$thisStudy$tables$match.ref.table

colnames(match.table1)[colnames(match.table1) == 'Std_ref'] <- 'Standard Reference' match.table <- data.table(apply(match.table1,2, function(x) return(calculatePercent(x, .QC$thisStudy$rowcount.step2, pretty = T, decimal.place = 3) ) ))

match.table <- cbind(colnames(match.table1),match.table) colnames(match.table) <- c('Reference' ,'Count')

kable(match.table,escape='FALSE', align = "l",format = "html") `

Variant types

r if(nrow(.QC$thisStudy$tables$multi_allele_count_preProcess) > 1) kable(.QC$thisStudy$tables$multi_allele_count_preProcess,escape='FALSE', align = "l",format = "html")

Result of matching with standard reference dataset

Allele frequency reference dataset: r basename(.QC$config$supplementaryFiles$allele_ref_std)

`r count.table1 <- t(data.table( "Verified variants" = calculatePercent(.QC$thisStudy$found.rows.std, .QC$thisStudy$rowcount.step2, decimal.place=3, pretty=TRUE), "Not-found variants" = calculatePercent(.QC$thisStudy$not.found.rows.std, .QC$thisStudy$rowcount.step2, decimal.place=3, pretty=TRUE), "Flipped variants" = calculatePercent(.QC$thisStudy$flipped.rows.std, .QC$thisStudy$found.rows.std, pretty=TRUE), "Switched variants" = calculatePercent(.QC$thisStudy$switched.rows.std, .QC$thisStudy$found.rows.std, pretty=TRUE), "Allele frequency correlation" = '', "r (all variants)" = .QC$thisStudy$AFcor.std_ref, "r (palindromic)" = .QC$thisStudy$AFcor.palindromic.std_ref, "r (non-palindromic)" = .QC$thisStudy$AFcor.non.palindromic.std_ref, "r (INDEL)" = .QC$thisStudy$AFcor.std_ref.indel ))

colnames(count.table1) <- 'Count' kable(count.table1,escape='FALSE', align = "c",format = "html") `

r if(file.exists(.QC$thisStudy$stdMafPlotPath) & .QC$graphic.device != 'tiff') { paste(' <h4 class="header2">Standard reference allele-frequency comparison plot</h4>') htmltools::img(src = knitr::image_uri(.QC$thisStudy$stdMafPlotPath), alt = 'MAF', style = 'width:100%') }

r if(!is.na(.QC$config$supplementaryFiles$allele_ref_alt)) paste('<h4 class="header2">Result of matching with alternative reference dataset</h4>')

`r if(!is.na(.QC$config$supplementaryFiles$allele_ref_alt)){

count.table2 <- t(data.table( "Verified variants" = calculatePercent(.QC$thisStudy$found.rows.alt , .QC$thisStudy$not.found.rows.std, decimal.place=3, pretty=TRUE), "Not-found variants" = calculatePercent(.QC$thisStudy$not.found.rows.alt , .QC$thisStudy$not.found.rows.std, decimal.place=3, pretty=TRUE), "Flipped variants" = calculatePercent(.QC$thisStudy$flipped.rows.alt , .QC$thisStudy$found.rows.alt, pretty=TRUE), "Switched variants" = calculatePercent(.QC$thisStudy$switched.rows.alt , .QC$thisStudy$found.rows.alt, pretty=TRUE), "Allele frequency correlation" = '', "r (all variants)" = .QC$thisStudy$AFcor.alt_ref, "r (palindromic)" = .QC$thisStudy$AFcor.palindromic.alt_ref, "r (non-palindromic)" = .QC$thisStudy$AFcor.non.palindromic.alt_ref))

colnames(count.table2) <- 'Count'

  kable(count.table2,escape='FALSE', align = "c",format = "html")

} `

r if(file.exists(.QC$thisStudy$altMafPlotPath) & .QC$graphic.device != 'tiff') { paste(' <h4 class="header2">Standard reference allele-frequency comparison plot</h4>') htmltools::img(src = knitr::image_uri(.QC$thisStudy$altMafPlotPath), alt = 'MAF', style = 'width:100%') }

QC summary statistics

P-value correlation (observed vs expected)

Note: Only variants with a valid P-value are used for P-value correlation calculation.

`r count.table1 <- t(data.table( "Included variants" = calculatePercent(.QC$thisStudy$rownum.PVcor, .QC$thisStudy$rowcount.step3, pretty = T), "r (all variants)" = .QC$thisStudy$PVcor))

  colnames(count.table1) <- 'Value'

kable(count.table1,escape='FALSE', align = "c",format = "html") `

`r if(file.exists(.QC$thisStudy$pvalCorPlotPath) & .QC$graphic.device != 'tiff') { paste('

P-value correlation plot

htmltools::img(src = knitr::image_uri(.QC$thisStudy$pvalCorPlotPath), 
           alt = 'MAF', 
           style = 'width:50%')

} `

Distribution statistics

`r count.table2 <- t(data.table( "Skewness" = .QC$thisStudy$skewness, "Skewness (HQ)" = .QC$thisStudy$skewness.HQ, "Kurtosis" = .QC$thisStudy$kurtosis, "Kurtosis (HQ)" = .QC$thisStudy$kurtosis.HQ, "Visscher's stat" = .QC$thisStudy$Visschers.stat , "Visscher's stat (HQ)" = .QC$thisStudy$Visschers.stat.HQ, "Lambda - total" = .QC$thisStudy$lambda , "Lambda - genotyped" = .QC$thisStudy$lambda.gen, "Lambda - imputed" = .QC$thisStudy$lambda.imp, "Sample Size (max)" = .QC$thisStudy$MAX_N_TOTAL, "Fixed HWE P-value" = .QC$thisStudy$fixed.hwep, "Fixed Imputation Quality" = .QC$thisStudy$fixed.impq, "Fixed Call Rate" = .QC$thisStudy$fixed.callrate, "Fixed Sample Size" = .QC$thisStudy$fixed.n_total))

  colnames(count.table2) <- 'Value'

    kable(count.table2,escape='FALSE', align = "c",format = "html")

r kable(t(.QC$thisStudy$tables$variable.summary), format = "html")

Variable statistics

`r b <- t(data.frame('CHR' = c(abs(study$column.NA.list$CHR - study$column.INVALID.list$CHR) , study$column.INVALID.list$CHR, ' '),

                'POSITION' = c(abs(study$column.NA.list$POSITION - study$column.INVALID.list$POSITION) ,
                               study$column.INVALID.list$POSITION,
                               ' '),

                'EFFECT_ALL' = c(abs(study$column.NA.list$EFFECT_ALL - study$column.INVALID.list$EFFECT_ALL) ,
                                 study$column.INVALID.list$EFFECT_ALL,
                                 ' '),

                'OTHER_ALL' = c(abs(study$column.NA.list$OTHER_ALL - study$column.INVALID.list$OTHER_ALL) ,
                                study$column.INVALID.list$OTHER_ALL,
                                ' '),

                'EFFECT' = c(abs(study$column.NA.list$EFFECT - study$column.INVALID.list$EFFECT) ,
                             study$column.INVALID.list$EFFECT,
                             ' '),

                'STDERR' = c(abs(study$column.NA.list$STDERR - study$column.INVALID.list$STDERR - study$column.INVALID.list$zero.STDERR) ,
                             study$column.INVALID.list$STDERR,
                             study$column.INVALID.list$zero.STDERR),

                'EFF_ALL_FREQ' = c(abs(study$column.NA.list$EFF_ALL_FREQ - study$column.INVALID.list$EFF_ALL_FREQ - study$column.INVALID.list$minusone.EFF_ALL_FREQ),
                                   study$column.INVALID.list$EFF_ALL_FREQ,
                                   study$column.INVALID.list$minusone.EFF_ALL_FREQ),

                'HWE_PVAL' = c(abs(study$column.NA.list$HWE_PVAL - study$column.INVALID.list$HWE_PVAL - study$column.INVALID.list$minusone.HWE_PVAL) ,
                               study$column.INVALID.list$HWE_PVAL,
                               study$column.INVALID.list$minusone.HWE_PVAL),

                'PVALUE' = c(abs(study$column.NA.list$PVALUE - study$column.INVALID.list$PVALUE - study$column.INVALID.list$minusone.PVALUE) ,
                             study$column.INVALID.list$PVALUE,
                             study$column.INVALID.list$minusone.PVALUE),

                'IMPUTED' = c(abs(study$column.NA.list$IMPUTED - study$column.INVALID.list$IMPUTED),
                              study$column.INVALID.list$IMPUTED,
                              ' '),

                'IMP_QUALITY' = c(abs(study$column.NA.list$IMP_QUALITY - study$column.INVALID.list$IMP_QUALITY) ,
                                  study$column.INVALID.list$IMP_QUALITY,
                                  ' '),

                'MARKER' = c(abs(study$column.NA.list$MARKER - study$column.INVALID.list$MARKER) ,
                             study$column.INVALID.list$MARKER,
                             ' '),

                'N_TOTAL' = c(abs(study$column.NA.list$N_TOTAL - study$column.INVALID.list$N_TOTAL) ,
                              study$column.INVALID.list$N_TOTAL,
                              ' '),

                'STRAND' = c(abs(study$column.NA.list$STRAND - study$column.INVALID.list$STRAND) ,
                             study$column.INVALID.list$STRAND,
                             ' '),

                'CALLRATE' = c(abs(study$column.NA.list$CALLRATE - study$column.INVALID.list$CALLRATE - study$column.INVALID.list$minusone.CALLRATE),
                               study$column.INVALID.list$CALLRATE ,
                               study$column.INVALID.list$minusone.CALLRATE)

))

colnames(b) <- c('NA values','Invalid values','Uncertain values')

kable(b, align = "c", escape= FALSE ,format = "html")

Chromosome

Variant count for each chromosome: `r if(!is.na(study$tables$CHR.tbl)) { tbl = t(.QC$thisStudy$tables$CHR.tbl) rownames(tbl) <- c('Chromosome','Count')

kable(tbl, align = "c",format = "html") } else{ paste('CHR column not found!')} `

Effect Allele

Allele distribution: `r

tbl = merge(.QC$thisStudy$tables$EFFECT_ALL.tbl, .QC$thisStudy$tables$EFFECT_ALL.post.matching.tbl, by="EFFECT_ALL", all = T) tbl = t(tbl)

rownames(tbl) <- c('Allele','Count (input file)','Count (post-matching)')

kable(tbl, align = "c",format = "html") `

Other Allele

Allele distribution: `r

tbl = merge(.QC$thisStudy$tables$OTHER_ALL.tbl, .QC$thisStudy$tables$OTHER_ALL.post.matching.tbl, by="OTHER_ALL", all = T) tbl = t(tbl)

rownames(tbl) <- c('Allele','Count (input file)','Count (post-matching)')

kable(tbl, align = "c",format = "html") `

Strand

Negative strand variants = r .QC$thisStudy$neg.strand.count

Imputation status

`r tbl = .QC$thisStudy$tables$imputed.tbl

colnames(tbl) <- c('','Count')

kable(tbl, align = "c",format = "html")

POSITION

Invalid values (< 0)

EFFECT

Invalid values ( = -1)

STDERR

uncertain values ( = 0)
Invalid values ( < 0)

PVALUE

uncertain values ( = -1)
Invalid values ( > 1 or <= 0)

Allele frequency

Allele frequency = 0: r .QC$thisStudy$column.INVALID.list$zero.EFF_ALL_FREQ
Allele frequency = 1: r .QC$thisStudy$column.INVALID.list$one.EFF_ALL_FREQ
uncertain values ( = -1)
Invalid values ( < 0 or > 1)

HWE_PVAL

uncertain values ( = -1)
Invalid values ( > 1 or <= 0)

IMP_QUALITY

valid range for Imputation Quality is between r .QC$config$filters$minimal_impQ_value and r .QC$config$filters$maximal_impQ_value

CALLRATE

uncertain values (CALLRATE = -1)
Invalid values (> 1 or < 0)

N_TOTAL

Invalid values (<= 0)

r if(!is.na(.QC$config$supplementaryFiles$beta_ref_std)){ paste('<h3 class="header1">Effect-size correlation</h3>') }

`r if(!is.na(.QC$config$supplementaryFiles$beta_ref_std)){ paste('* r =', .QC$thisStudy$effect.rho_4) }

`r if(file.exists(.QC$thisStudy$effPlotPath) & .QC$graphic.device != 'tiff') {

htmltools::img(src = knitr::image_uri(.QC$thisStudy$effPlotPath), 
           alt = 'MAF', 
           style = 'width:50%')

}

r if(file.exists(.QC$thisStudy$histPlotPath) & .QC$graphic.device != 'tiff') paste('<h3 class="header1">Plots</h3>')

r if(file.exists(.QC$thisStudy$histPlotPath) & .QC$graphic.device != 'tiff') { htmltools::img(src = knitr::image_uri(.QC$thisStudy$histPlotPath), alt = 'MAF', style = 'width:100%') }

`r if(file.exists(.QC$thisStudy$QQPlotPath) & .QC$graphic.device != 'tiff'){ htmltools::img(src = knitr::image_uri(.QC$thisStudy$QQPlotPath), alt = 'MAF', style = 'width:100%') }

`r if(file.exists(.QC$thisStudy$manPlotPath) & .QC$graphic.device != 'tiff') { htmltools::img(src = knitr::image_uri(.QC$thisStudy$manPlotPath), alt = 'MAF', style = 'width:100%') }

Any scripts or data that you put into this service are public.

GWASinspector documentation built on April 4, 2025, 4:35 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GWASinspector
Comprehensive and Easy to Use Quality Control of GWAS Results

In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

Filter values for selecting High-Quality (HQ) variants

Input File Description

Column names

Variant counts

The results from matching variants with reference datasets

References used for variant matching

Variant types

Result of matching with standard reference dataset

QC summary statistics

P-value correlation (observed vs expected)

P-value correlation plot

Distribution statistics

Variable statistics

Chromosome

Effect Allele

Other Allele

Strand

Imputation status

POSITION

EFFECT

STDERR

PVALUE

Allele frequency

HWE_PVAL

IMP_QUALITY

CALLRATE

N_TOTAL

Try the GWASinspector package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GWASinspector Comprehensive and Easy to Use Quality Control of GWAS Results

In GWASinspector: Comprehensive and Easy to Use Quality Control of GWAS Results

Filter values for selecting High-Quality (HQ) variants

Input File Description

Column names

Variant counts

The results from matching variants with reference datasets

References used for variant matching

Variant types

Result of matching with standard reference dataset

QC summary statistics

P-value correlation (observed vs expected)

P-value correlation plot

Distribution statistics

Variable statistics

Chromosome

Effect Allele

Other Allele

Strand

Imputation status

POSITION

EFFECT

STDERR

PVALUE

Allele frequency

HWE_PVAL

IMP_QUALITY

CALLRATE

N_TOTAL

Try the GWASinspector package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GWASinspector
Comprehensive and Easy to Use Quality Control of GWAS Results