Introduction_of_geneHapR.R
In geneHapR: Gene Haplotype Statistics, Phenotype Association and Visualization

## ----Out Put Format1, echo=FALSE, fig.align='center', fig.cap="Cartoon representation of hapResult and hapSummary contents", fig.height=4, fig.width=6----
plot(c(0,5),c(0,5), axes = FALSE, type = "n", xlab="", ylab ="", frame.plot = F)
rect(xleft=0, ybottom=0, xright=0.5, ytop=4.5)
rect(xleft=0.5, ybottom=3.5, xright=4, ytop=4.5)
rect(xleft=0.5, ybottom=0, xright=4, ytop=3.5)
rect(xleft=4, ybottom=0, xright=5, ytop=3.5)
text(0.25, 4.75, "Part I", cex=1)
text(2.25, 4.75, "Part II", cex=1)
text(4.5, 4.75, "Part III", cex=1)
text(0.25, 3, "Lead column", cex=1, srt = 270)
text(2.25, 4.15, "Sites information", cex=1)
text(2.25, 3.85, "(CHROM, POS, INFO, ALLELE)", cex=0.8)
text(2.25, 2, "Genotypes", cex=1)
text(4.5, 2, "Accessions (freq)", cex=1, srt= 270)

## ----include=FALSE------------------------------------------------------------
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")
knitr::opts_chunk$set(purl = NOT_CRAN)

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("geneHapR")

## ----startup, eval=NOT_CRAN, echo=TRUE, message=FALSE, warning=FALSE----------
library(geneHapR)

## ----setups, eval=NOT_CRAN, include=FALSE-------------------------------------
data("geneHapR_test")

## ----import vcf, eval=FALSE, include=TRUE-------------------------------------
#  # import vcf file
#  vcf <- import_vcf("your_vcf_file_path.vcf")
#  
#  # import gziped vcf file
#  vcf <- import_vcf("your_vcf_file_path.vcf.gz")

## ----import p.link, eval=FALSE, include=TRUE----------------------------------
#  plink <- import_plink.pedmap(mapfile = "p_link.map", pedfile = "p_link.ped",
#                               sep_ped = "\t", sep_map = "\t")
#  plink <- import_plink.pedmap(root = "p_link", sep_ped = "\t", sep_map = "\t")

## ----import gff, eval=FALSE, include=TRUE-------------------------------------
#  # import GFFs
#  gff <- import_gff("your_gff_file_path.gff", format = "GFF")

## ----import bed, eval=FALSE, include=TRUE-------------------------------------
#  # import GFFs
#  bed <- import_bed("your_gff_file_path.bed")

## ----import DNA seqs, eval=FALSE, include=TRUE--------------------------------
#  # import DNA sequences in fasta format
#  seqs <- import_seqs("your_DNA_seq_file_path.fa", format = "fasta")

## ----import phenotype and accession groups, eval=FALSE, include=TRUE----------
#  # import phynotype data
#  pheno <- import_AccINFO("your_pheno_file_path.txt")
#  pheno

## ----eval=NOT_CRAN, echo=FALSE------------------------------------------------
head(pheno)

## ----import pheno, eval=FALSE-------------------------------------------------
#  # import accession group/location information
#  AccINFO <- import_AccINFO("accession_group_file_path.txt")

## ----eval=NOT_CRAN, echo=FALSE------------------------------------------------
head(AccINFO)

## ----import pheno and accessions_groups, eval=FALSE, include=TRUE-------------
#  # import pheno from space ' ' delimed table
#  pheno <- read.table("your_pheno_file_path.csv", header = TRUE, row.names = 1, comment.char = "#")
#  
#  # import pheno from ',' delimed table
#  pheno <- read.csv("your_pheno_file_path.csv", header = TRUE, comment.char = "#")

## ----VCF filtration, eval=FALSE, include=TRUE---------------------------------
#  # filter VCF by position
#  vcf_f1 <- filter_vcf(vcf, mode = "POS",
#                       Chr = "scaffold_1",
#                       start = 4300, end = 5890)
#  
#  # filter VCF by annotation
#  vcf_f2 <- filter_vcf(vcf, mode = "type",
#                       gff = gff,
#                       type = "CDS")
#  
#  # filter VCF by position and annotation
#  vcf_f3 <- filter_vcf(vcf, mode = "both",
#                       Chr = "scaffold_1",
#                       start = 4300, end = 5890,
#                       gff = gff,
#                       type = "CDS")

## ----preprocess large VCF, eval=FALSE, include=TRUE---------------------------
#  # new VCF file will be saved to disk
#  # extract a single gene/range from a large vcf
#  
#  filterLargeVCF(VCFin = "Ori.vcf.gz",
#                 VCFout = "filtered.vcf.gz",
#                 Chr = "scaffold_8",
#                 POS = c(19802,24501),
#                 override = TRUE)
#  
#  # extract multi genes/ranges from large vcf
#  filterLargeVCF(VCFin = "Ori.vcf.gz",          # surfix should be .vcf.gz or .vcf
#                 VCFout = c("filtered1.vcf.gz", # surfix should be .vcf.gz or .vcf
#                            "filtered2.vcf.gz",
#                            "filtered3.vcf.gz"),
#                 Chr = c("scaffold_8",
#                         "scaffold_8",
#                         "scaffold_7"),
#                 POS = list(c(19802,24501),
#                            c(27341,28949),
#                            c(38469,40344)),
#                 override = TRUE)               # if TRUE, existed file will be override without warning

## ----p.link filtration, eval=FALSE, message=FALSE, warning=FALSE, include=TRUE----
#  p.link <- filter_plink.pedmap(p.link, mode = "POS",
#                                Chr = "Chr08", start = 25947258, end = 25948258)

## ----DNA alignment and trim, eval=FALSE, message=FALSE, warning=FALSE, include=TRUE----
#  # sequences alignment
#  seqs <- allignSeqs(seqs, quiet = TRUE)
#  
#  # sequences trim
#  seqs <- trimSeqs(seqs, minFlankFraction = 0.1)
#  seqs

## ----hap filtration, eval=FALSE, message=FALSE, warning=FALSE, include=TRUE----
#  hap <- filter_hap(hapSummary,
#                    rm.mode = c("position", "accession", "haplotype", "freq"),
#                    position.rm = c(4879, 4950),
#                    accession.rm = c("C1", "C9"),
#                    haplotype.rm = c("H009", "H008"),
#                    freq.min = 5)

## ----haplotype calculation from vcf, eval=NOT_CRAN, include=TRUE, message=FALSE, warning=FALSE----
hapResult <- vcf2hap(vcf,
                     hapPrefix = "H",
                     hetero_remove = TRUE,
                     na_drop = TRUE)
hapResult

## ----haplotype calculation from seqs, eval=FALSE------------------------------
#  hapResult <- seqs2hap(seqs,
#                        Ref = names(seqs)[1],
#                        hapPrefix = "H",
#                        hetero_remove = TRUE,
#                        na_drop = TRUE,
#                        maxGapsPerSeq = 0.25)

## ----check site numbers, eval=NOT_CRAN----------------------------------------
# Chech number of sites conclude in hapResult
sites(hapResult)

## ----replace INFO, eval=NOT_CRAN, include=TRUE, message=FALSE, warning=FALSE----
# add annotations to INFO field
hapResult <- addINFO(hapResult,
                     tag = "PrChange",
                     values = rep(c("C->D", "V->R", "G->N"),3),
                     replace = TRUE)

## ----add INFO, eval=NOT_CRAN, include=TRUE, message=FALSE, warning=FALSE------
# To replace the origin INFO by set 'replace' as TRUE
hapResult <- addINFO(hapResult,
                     tag = "CDSChange",
                     values = rep(c("C->A", "T->C", "G->T"),3),
                     replace = FALSE)

## ----ATG position, eval=FALSE-------------------------------------------------
#  # set ATG position as zero in gff
#  newgff <- gffSetATGas0(gff = gff, hap = hapResult,
#                         geneID = "test1G0387",
#                         Chr = "scaffold_1",
#                         POS = c(4300, 7910))
#  
#  # set position of ATG as zero in hapResult/hapSummary
#  newhap <- hapSetATGas0(gff = gff, hap = hapResult,
#                         geneID = "test1G0387",
#                         Chr = "scaffold_1",
#                         POS = c(4300, 7910))

## ----hapSummary, eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
hapSummary <- hap_summary(hapResult)
hapSummary

## ----eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
plotHapTable(hapSummary)

## ----eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
# add one annotation
plotHapTable(hapSummary,
             hapPrefix = "H",
             INFO_tag = "CDSChange", 
             tag_name = "CDS",
             displayIndelSize = 1, 
             angle = 45,
             replaceMultiAllele = TRUE,
             ALLELE.color = "grey90")

## ----eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
# add multi annotation
plotHapTable(hapSummary,
             hapPrefix = "H",
             INFO_tag = c("CDSChange", "PrChange"),
             displayIndelSize = 1, 
             angle = 45,
             replaceMultiAllele = TRUE,
             ALLELE.color = "grey90")

## ----eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
# add multi annotation
plotHapTable(hapSummary,
             hapPrefix = "H",
             INFO_tag = c("CDSChange", "PrChange"),
             tag_name = c("CDS", "Pr"),
             displayIndelSize = 1, 
             angle = 45,
             replaceMultiAllele = TRUE,
             ALLELE.color = "grey90")

## ----display variations on gene model, eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
displayVarOnGeneModel(hapSummary, gff,
                      Chr = "scaffold_1",
                      startPOS = 4300, endPOS = 7910,
                      type = "pin", cex = 0.7,
                      CDS_h = 0.05, fiveUTR_h = 0.02, threeUTR_h = 0.01)

## ----hapNet, eval=NOT_CRAN----------------------------------------------------
hapNet <- get_hapNet(hapSummary,
                     AccINFO = AccINFO,
                     groupName = "Type")

## ----eval=NOT_CRAN, fig.height=6, fig.width=7, message=FALSE, warning=FALSE, paged.print=FALSE----
# plot haploNet

plotHapNet(hapNet,
           size = "freq",                   # circle size
           scale = "log2",                 # scale circle with 'log10(size + 1)'
           cex = 0.8,                       # size of hap symbol
           col.link = 2,                    # link colors
           link.width = 2,                  # link widths
           show.mutation = 2,               # mutation types one of c(0,1,2,3)
           legend = c(-12.5, 7))        # legend position

## ----geo distribution, eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
# library(mapdata)
# library(maptools)
hapDistribution(hapResult,
                AccINFO = AccINFO,
                LON.col = "longitude",
                LAT.col = "latitude", 
                hapNames = c("H001", "H002", "H003"), 
                legend = TRUE)

## ----hapVsPheno merged, eval=NOT_CRAN, fig.height=4, fig.width=10, message=FALSE, warning=FALSE, paged.print=FALSE----
results <-hapVsPheno(hapResult,
                     hapPrefix = "H",
                     title = "This is title",
                     mergeFigs = TRUE,
                     pheno = pheno,
                     phenoName = "GrainWeight.2021",
                     minAcc = 3)
plot(results$figs)

## ----hapVsPheno separated, eval=NOT_CRAN, fig.height=4, fig.width=6, message=FALSE, warning=FALSE, paged.print=FALSE----
results <- hapVsPheno(hap = hapResult,
                      hapPrefix = "H",
                      title = "This is title",
                      pheno = pheno,
                      phenoName = "GrainWeight.2021",
                      minAcc = 3,
                      mergeFigs = FALSE)
plot(results$fig_pvalue)
plot(results$fig_Violin)

## ----eval=FALSE---------------------------------------------------------------
#  hapVsPhenos(hapResult,
#              pheno,
#              outPutSingleFile = TRUE,
#              hapPrefix = "H",
#              title = "Seita.0G000000",
#              file = "mypheno.tiff",
#              width = 12,
#              height = 8,
#              res = 300)
Any scripts or data that you put into this service are public.
geneHapR documentation built on May 29, 2024, 11:59 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
geneHapR
Gene Haplotype Statistics, Phenotype Association and Visualization

inst/doc/Introduction_of_geneHapR.R
In geneHapR: Gene Haplotype Statistics, Phenotype Association and Visualization

Try the geneHapR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

geneHapR Gene Haplotype Statistics, Phenotype Association and Visualization

inst/doc/Introduction_of_geneHapR.R In geneHapR: Gene Haplotype Statistics, Phenotype Association and Visualization

Try the geneHapR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

geneHapR
Gene Haplotype Statistics, Phenotype Association and Visualization

inst/doc/Introduction_of_geneHapR.R
In geneHapR: Gene Haplotype Statistics, Phenotype Association and Visualization