inst/doc/GALLO.R

## ----echo=T, results='asis'---------------------------------------------------
#Installation
#devtools::install_github("pablobio/GALLO")

#Loading the package
library(GALLO)

## ----echo=T, results='asis'---------------------------------------------------
#Importing QTL markers from example dataset
data("QTLmarkers")

DT::datatable(QTLmarkers, rownames = FALSE, extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

dim(QTLmarkers)

#Importing QTL windows from example dataset
data("QTLwindows")
DT::datatable(QTLwindows, rownames = FALSE, extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

dim(QTLwindows)

## ----echo=T, results='asis'---------------------------------------------------
#Importing QTL annotation database
data("gffQTLs")

#Printing the first 100 rows
DT::datatable(gffQTLs[1:100,], rownames = FALSE, extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

dim(gffQTLs)

#Importing gene annotation database
data("gtfGenes")

#Printing the first 100 rows
DT::datatable(gtfGenes[1:100,], rownames = FALSE, extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

dim(gtfGenes)


## ----echo=T, results='asis'---------------------------------------------------
#An example of how to import a QTL annotation file
#qtl.inp <- import_gff_gtf(db_file="QTL_db.gff",file_type="gff")

#An example of how to import a gene annotation file
#qtf.inp <- import_gff_gtf(db_file="Gene_db.gtf",file_type="gtf")

## ----echo=T, results='asis'---------------------------------------------------

#Running gene annotation
out.genes<-find_genes_qtls_around_markers(db_file=gtfGenes,
marker_file=QTLmarkers, method = "gene", 
marker = "snp", interval = 500000, nThreads = 1)

#Checking the first rows from the output file
DT::datatable(out.genes, rownames = FALSE, 
  extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

#Checking the dimensions of the output file
dim(out.genes)

## ----echo=T, results='asis'---------------------------------------------------
#Running QTL annotation
out.qtls<-find_genes_qtls_around_markers(db_file=gffQTLs,
marker_file=QTLmarkers, method = "qtl",
marker = "snp", interval = 500000, nThreads = 1)

#Checking the first rows from the output file
DT::datatable(out.qtls, rownames = FALSE, 
  extensions = 'FixedColumns',
  options = list(scrollX = TRUE))

#Checking the dimensions of the output file
dim(out.qtls)

## ----echo=T-------------------------------------------------------------------
#Removing the duplicated records for Gene ID
out.genes.unique<-out.genes[!duplicated(out.genes[c("Reference","gene_id")]),]
#Calculating the number of shared genes between different studies
overlap.genes<-overlapping_among_groups(file=out.genes.unique, x="Reference", y="gene_id")

overlap.genes

## ----echo=T-------------------------------------------------------------------
#Creating grouping labels (names of the references)
group.labels<-unique(out.genes.unique$Reference)

## ---- fig.align="center", fig.width=5, fig.height=5, fig.cap="Figure 1: Gene overlapping among studies."----
#plotting the overlapping information
plot_overlapping(overlap.genes, nmatrix=2, ntext=3, group=group.labels, labelcex = 1)


## ----echo=T, fig.height = 10, fig.width = 8, fig.align = "center"-------------
#plotting the percentage of each QTL class annoatted
oldpar <- par(mar=c(0.5,15,0.5,1))
plot_qtl_info(out.qtls, qtl_plot = "qtl_type", cex=1.5)
par(oldpar)

## ---- fig.width=8, fig.height=6, fig.cap="Figure 2: Percentage of each trait annotated as a Reproduction QTL in the candidate intervals."----
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtls, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(oldpar)

## ---- results='hide', eval=F--------------------------------------------------
#  #Plotting percentage of the top 10 most frequent traits in all QTL classes
#  #(This is just an example code, the user do not need to execute
#  #this command for this tutorial)
#  QTL_classes<-unique(out.qtls$QTL_type)
#  
#  for(c in QTL_classes){
#    tmp.file.name<-paste(c,".png",sep="")
#    png(tmp.file.name,w=1500,h=900)
#    plot_qtl_info(out.qtls, qtl_plot = "qtl_name", qtl_class=c, n=10)
#    dev.off()
#  }
#  

## ----echo=T, results='asis'---------------------------------------------------
#QTL enrichment analysis 
out.enrich<-qtl_enrich(qtl_db=gffQTLs,
                       qtl_file=out.qtls, 
                       qtl_type = "Name",
                       enrich_type = "chromosome",
                       chr.subset = NULL, 
                       padj ="fdr",nThreads = 1)

#Checking the enriched QTLs
DT::datatable(out.enrich[order(out.enrich$pvalue),], 
              rownames = FALSE,
              extensions = 'FixedColumns',
              options = list(scrollX = TRUE))

## ---- echo=T------------------------------------------------------------------
#Creating a new ID composed by the trait and the chromosome
out.enrich$ID<-paste(out.enrich$QTL," - ","CHR",out.enrich$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich[which(out.enrich$adj.pval<0.05),]

#Here we are going to truncate the FDR values at -log10(5^-50) in order to provide a better visualization

out.enrich.filtered$new_pval<-out.enrich.filtered$adj.pval

out.enrich.filtered[which(out.enrich.filtered$new_pval<(5^-50)),"new_pval"]<-(5^-50)

## ---- fig.width=10, fig.height=8, fig.cap="Figure 3: Top 10 enriched traits identified in the QTL enrichment analysis. The area of the bubbles represents the number of observed QTLs for that class, while the color represents the p-value scale (the darker the color, smaller the p-value). Additionally, the x-axis shows the richness factor for each QTL, representing the ratio of number of QTLs and the expected number of that QTL."----
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="new_pval")

## ---- fig.width=7, fig.height=7, fig.cap="Figure 4: Chord plot showing the relationship between the positional candidate markers identified in the GWAS (right-hand side) and the enriched QTLs (abbreviations in the left -hand side). The colors correspond to each study, where markers identified by Feugang et al. (2010) are shown in purple, while the markers identified by Buzanskas et al. (2017) are shown in pink."----

#Filtering the output from QTL annotation

#Creating a new ID to filter the top 10 enriched QTLs
out.qtls$ID<-paste(out.qtls$Name," - ","CHR",out.qtls$CHR,sep="")

out.enrich.filtered<-out.enrich.filtered[
order(out.enrich.filtered$adj.pval),]

out.qtls.filtered<-out.qtls[
which(out.qtls$ID%in%out.enrich.filtered$ID[1:10]),]

#Creating color scheme based on the References
out.qtls.filtered[which(
out.qtls.filtered$Reference=="Feugang et al. (2010)"),
"color_ref"]<-"purple"

out.qtls.filtered[which(
out.qtls.filtered$Reference== "Buzanskas et al. (2017)"),
"color_ref"]<-"pink"

#Creating a color vector filled with black for all the traits abbreviation
#and with the respective colors for each reference
color.grid<-c(rep("black",
length(unique(out.qtls.filtered$Abbrev))),
out.qtls.filtered[!duplicated(out.qtls.filtered$SNP.reference),"color_ref"])

#Naming the vector
names(color.grid)<-c(unique(
out.qtls.filtered$Abbrev),unique(
out.qtls.filtered$SNP.reference))

#Plotting the relationship plot using the grid color created above
relationship_plot(qtl_file=out.qtls.filtered, x="Abbrev", y="SNP.reference", cex=1,gap=2.5,degree = 60, canvas.xlim = c(-1.5, 1.5),canvas.ylim = c(-1.5, 1.5),grid.col = color.grid)

Try the GALLO package in your browser

Any scripts or data that you put into this service are public.

GALLO documentation built on Nov. 10, 2021, 1:07 a.m.