inst/doc/geno2proteoIntroduction.R

### R code from vignette source 'geno2proteoIntroduction.Rnw'

###################################################
### code chunk number 1: options
###################################################
options(width=72)


###################################################
### code chunk number 2: installPackage (eval = FALSE)
###################################################
## install.packages("geno2proteo")


###################################################
### code chunk number 3: example_generatingCDSaaFile
###################################################
library(geno2proteo)

dataFolder = system.file("extdata", package="geno2proteo")
geneticCodeFile = file.path(dataFolder, "geneticCode_standardTable_lines.txt")
gtfFile_chr16 = file.path(dataFolder,
"Homo_sapiens.GRCh37.74_chromosome16_35Mlong.gtf.gz")
DNAfastaFile_chr16 =  file.path(dataFolder, 
"Homo_sapiens.GRCh37.74.dna.chromosome.16.fa_theFirst3p5M.txt.gz");

tmpFolder = tempdir();

generatingCDSaaFile(geneticCodeFile_line=geneticCodeFile, 
gtfFile=gtfFile_chr16, DNAfastaFile=DNAfastaFile_chr16, 
outputFolder=tmpFolder, perlExec="perl")


###################################################
### code chunk number 4: example_genomicLocsToProteinSequence
###################################################
library(geno2proteo)

dataFolder = system.file("extdata", package="geno2proteo")
# the input file
inputFile_loci=file.path(dataFolder, 
    "transId_pfamDomainStartEnd_chr16_Zdomains_22examples_genomicPos.txt")
#the CDSs data file
CDSaaFile=file.path(dataFolder, 
    "Homo_sapiens.GRCh37.74_chromosome16_35Mlong.gtf.gz_AAseq.txt.gz") 
#the input data
inputLoci = read.table(inputFile_loci, sep='\t', stringsAsFactors=FALSE, 
    header=TRUE)
inputLoci = inputLoci[,1:4] # here just use the first 4 columns of input data. 
# But the input data can have as many extra columns as you like.
proteinSeq = genomicLocsToProteinSequence(inputLoci=inputLoci, 
CDSaaFile=CDSaaFile)
# the 1st and 5th genomic loci in the input
inputLoci[c(1,5),] 
# the DNA and protein sequences of the coding regions within the two loci
proteinSeq[c(1,5),] 



###################################################
### code chunk number 5: example_genomicLocsToWholeDNASequence
###################################################
library(geno2proteo)

dataFolder = system.file("extdata", package="geno2proteo")
inputFile_loci=file.path(dataFolder, 
    "transId_pfamDomainStartEnd_chr16_Zdomains_22examples_genomicPos.txt")
DNAfastaFile =  file.path(dataFolder, 
    "Homo_sapiens.GRCh37.74.dna.chromosome.16.fa_theFirst3p5M.txt.gz");

inputLoci = read.table(inputFile_loci, sep='\t', stringsAsFactors=FALSE, 
    header=TRUE)
inputLoci = inputLoci[,1:4] # here just use the first 4 columns of input data. 
# But the input data can have as many extra columns as you like.
tmpFolder = tempdir();
DNASeqNow = genomicLocsToWholeDNASequence(inputLoci=inputLoci, 
    DNAfastaFile=DNAfastaFile, tempFolder=tmpFolder, perlExec="perl")

inputLoci[c(1,5),] # the 1st and 5th genomic loci in the input

DNASeqNow[c(1,5),] # the whole DNA sequences of the two loci



###################################################
### code chunk number 6: example_proteinLocsToGenomic
###################################################
library(geno2proteo)

dataFolder = system.file("extdata", package="geno2proteo")
# first using the ENSEMBL protein ID to specify the proteins
inputFile_loci=file.path(dataFolder, 
"transId_pfamDomainStartEnd_chr16_Zdomains_22examples_proteinID.txt")
CDSaaFile=file.path(dataFolder, 
"Homo_sapiens.GRCh37.74_chromosome16_35Mlong.gtf.gz_AAseq.txt.gz")

inputLoci = read.table(inputFile_loci, sep='\t', stringsAsFactors=FALSE, 
header=TRUE)

genomicLoci = proteinLocsToGenomic(inputLoci=inputLoci, CDSaaFile=CDSaaFile)

inputLoci[c(1,5),] # the 1st and 5th protein regions in the input data

genomicLoci[c(1,5),] # the genomic locations of these protein regions

# then using the ENSEMBL transcript ID to specify the proteins
inputFile_loci=file.path(dataFolder, 
"transId_pfamDomainStartEnd_chr16_Zdomains_22examples.txt")
inputLoci = read.table(inputFile_loci, sep='\t', stringsAsFactors=FALSE, 
header=TRUE)
genomicLoci = proteinLocsToGenomic(inputLoci=inputLoci, CDSaaFile=CDSaaFile)

inputLoci[c(1,5),] # the 1st and 5th protein regions in the input data

genomicLoci[c(1,5),] # the genomic locations of these protein regions



###################################################
### code chunk number 7: example_proteinLocsToProteinSeq
###################################################
library(geno2proteo)

dataFolder = system.file("extdata", package="geno2proteo")
inputFile_loci=file.path(dataFolder, 
"transId_pfamDomainStartEnd_chr16_Zdomains_22examples_proteinID.txt")
CDSaaFile=file.path(dataFolder, 
"Homo_sapiens.GRCh37.74_chromosome16_35Mlong.gtf.gz_AAseq.txt.gz")

inputLoci = read.table(inputFile_loci, sep='\t', stringsAsFactors=FALSE,
header=TRUE)

ProtSeqNow = proteinLocsToProteinSeq(inputLoci=inputLoci, CDSaaFile=CDSaaFile)

inputLoci[c(1,5),] # the 1st and 5the protein regions in the input data

ProtSeqNow[c(1,5),] # the protein sequences of these protein regions

Try the geno2proteo package in your browser

Any scripts or data that you put into this service are public.

geno2proteo documentation built on June 13, 2022, 5:08 p.m.