Nothing
## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)
## -----------------------------------------------------------------------------
library("protr")
# Load FASTA files
# Note that `system.file()` is for accessing example files
# in the protr package. Replace it with your own file path.
extracell <- readFASTA(
system.file(
"protseq/extracell.fasta",
package = "protr"
)
)
mitonchon <- readFASTA(
system.file(
"protseq/mitochondrion.fasta",
package = "protr"
)
)
## ----eval = FALSE-------------------------------------------------------------
# length(extracell)
## ----eval = FALSE-------------------------------------------------------------
# ## [1] 325
## ----eval = FALSE-------------------------------------------------------------
# length(mitonchon)
## ----eval = FALSE-------------------------------------------------------------
# ## [1] 306
## ----eval = FALSE-------------------------------------------------------------
# extracell <- extracell[(sapply(extracell, protcheck))]
# mitonchon <- mitonchon[(sapply(mitonchon, protcheck))]
## ----eval = FALSE-------------------------------------------------------------
# length(extracell)
## ----eval = FALSE-------------------------------------------------------------
# ## [1] 323
## ----eval = FALSE-------------------------------------------------------------
# length(mitonchon)
## ----eval = FALSE-------------------------------------------------------------
# ## [1] 304
## ----eval = FALSE-------------------------------------------------------------
# # Calculate APseAAC descriptors
# x1 <- t(sapply(extracell, extractAPAAC))
# x2 <- t(sapply(mitonchon, extractAPAAC))
# x <- rbind(x1, x2)
#
# # Make class labels
# labels <- as.factor(c(rep(0, length(extracell)), rep(1, length(mitonchon))))
## ----eval = FALSE-------------------------------------------------------------
# set.seed(1001)
#
# # Split training and test set
# tr.idx <- c(
# sample(1:nrow(x1), round(nrow(x1) * 0.75)),
# sample(nrow(x1) + 1:nrow(x2), round(nrow(x2) * 0.75))
# )
# te.idx <- setdiff(1:nrow(x), tr.idx)
#
# x.tr <- x[tr.idx, ]
# x.te <- x[te.idx, ]
# y.tr <- labels[tr.idx]
# y.te <- labels[te.idx]
## ----eval = FALSE-------------------------------------------------------------
# library("randomForest")
# rf.fit <- randomForest(x.tr, y.tr, cv.fold = 5)
# rf.fit
## ----eval = FALSE-------------------------------------------------------------
# ## Call:
# ## randomForest(x = x.tr, y = y.tr, cv.fold = 5)
# ## Type of random forest: classification
# ## Number of trees: 500
# ## No. of variables tried at each split: 8
# ##
# ## OOB estimate of error rate: 25.11%
# ## Confusion matrix:
# ## 0 1 class.error
# ## 0 196 46 0.1900826
# ## 1 72 156 0.3157895
## ----eval = FALSE-------------------------------------------------------------
# # Predict on test set
# rf.pred <- predict(rf.fit, newdata = x.te, type = "prob")[, 1]
#
# # Plot ROC curve
# library("pROC")
# plot.roc(y.te, rf.pred, grid = TRUE, print.auc = TRUE)
## ----eval = FALSE-------------------------------------------------------------
# ## Call:
# ## plot.roc.default(x = y.te, predictor = rf.pred, col = "#0080ff",
# ## grid = TRUE, print.auc = TRUE)
# ##
# ## Data: rf.pred in 81 controls (y.te 0) > 76 cases (y.te 1).
# ## Area under the curve: 0.8697
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/roc.png")
## ----extractAAC---------------------------------------------------------------
library("protr")
x <- readFASTA(system.file(
"protseq/P00750.fasta",
package = "protr"
))[[1]]
extractAAC(x)
## ----extractDC----------------------------------------------------------------
dc <- extractDC(x)
head(dc, n = 30L)
## ----extractTC----------------------------------------------------------------
tc <- extractTC(x)
head(tc, n = 36L)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/AAindex.png")
## ----extractMoreau1-----------------------------------------------------------
moreau <- extractMoreauBroto(x)
head(moreau, n = 36L)
## ----extractMoreau2-----------------------------------------------------------
# Define 3 custom properties
myprops <- data.frame(
AccNo = c("MyProp1", "MyProp2", "MyProp3"),
A = c(0.62, -0.5, 15), R = c(-2.53, 3, 101),
N = c(-0.78, 0.2, 58), D = c(-0.9, 3, 59),
C = c(0.29, -1, 47), E = c(-0.74, 3, 73),
Q = c(-0.85, 0.2, 72), G = c(0.48, 0, 1),
H = c(-0.4, -0.5, 82), I = c(1.38, -1.8, 57),
L = c(1.06, -1.8, 57), K = c(-1.5, 3, 73),
M = c(0.64, -1.3, 75), F = c(1.19, -2.5, 91),
P = c(0.12, 0, 42), S = c(-0.18, 0.3, 31),
T = c(-0.05, -0.4, 45), W = c(0.81, -3.4, 130),
Y = c(0.26, -2.3, 107), V = c(1.08, -1.5, 43)
)
# Use 4 properties in the AAindex database and 3 customized properties
moreau2 <- extractMoreauBroto(
x,
customprops = myprops,
props = c(
"CIDH920105", "BHAR880101",
"CHAM820101", "CHAM820102",
"MyProp1", "MyProp2", "MyProp3"
)
)
head(moreau2, n = 36L)
## ----extractMoran-------------------------------------------------------------
# Use the 3 custom properties defined before
# and 4 properties in the AAindex database
moran <- extractMoran(
x,
customprops = myprops,
props = c(
"CIDH920105", "BHAR880101",
"CHAM820101", "CHAM820102",
"MyProp1", "MyProp2", "MyProp3"
)
)
head(moran, n = 36L)
## ----extractGeary-------------------------------------------------------------
# Use the 3 custom properties defined before
# and 4 properties in the AAindex database
geary <- extractGeary(
x,
customprops = myprops,
props = c(
"CIDH920105", "BHAR880101",
"CHAM820101", "CHAM820102",
"MyProp1", "MyProp2", "MyProp3"
)
)
head(geary, n = 36L)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/CTD.png")
## ----extractCTDC--------------------------------------------------------------
extractCTDC(x)
## ----extractCTDT--------------------------------------------------------------
extractCTDT(x)
## ----extractCTDD--------------------------------------------------------------
extractCTDD(x)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/ctriad.png")
## ----extractCTriad------------------------------------------------------------
ctriad <- extractCTriad(x)
head(ctriad, n = 65L)
## ----extractSOCN--------------------------------------------------------------
extractSOCN(x)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/QSO.png")
## ----extractQSO---------------------------------------------------------------
extractQSO(x)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/PAAC.png")
## ----extractPAAC--------------------------------------------------------------
extractPAAC(x)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/APAAC.png")
## ----extractAPAAC-------------------------------------------------------------
extractAPAAC(x)
## ----extractDescScales--------------------------------------------------------
x <- readFASTA(system.file(
"protseq/P00750.fasta",
package = "protr"
))[[1]]
descscales <- extractDescScales(
x,
propmat = "AATopo",
index = c(37:41, 43:47),
pc = 5, lag = 7, silent = FALSE
)
## ----extractDescScales2-------------------------------------------------------
length(descscales)
head(descscales, 15)
## ----extractBLOSUM------------------------------------------------------------
x <- readFASTA(system.file(
"protseq/P00750.fasta",
package = "protr"
))[[1]]
blosum <- extractBLOSUM(
x,
submat = "AABLOSUM62",
k = 5, lag = 7, scale = TRUE, silent = FALSE
)
## ----extractBLOSUM2-----------------------------------------------------------
length(blosum)
head(blosum, 15)
## ----eval = FALSE-------------------------------------------------------------
# s1 <- readFASTA(system.file("protseq/P00750.fasta", package = "protr"))[[1]]
# s2 <- readFASTA(system.file("protseq/P08218.fasta", package = "protr"))[[1]]
# s3 <- readFASTA(system.file("protseq/P10323.fasta", package = "protr"))[[1]]
# s4 <- readFASTA(system.file("protseq/P20160.fasta", package = "protr"))[[1]]
# s5 <- readFASTA(system.file("protseq/Q9NZP8.fasta", package = "protr"))[[1]]
# plist <- list(s1, s2, s3, s4, s5)
# psimmat <- parSeqSim(plist, cores = 4, type = "local", submat = "BLOSUM62")
# psimmat
## ----eval = FALSE-------------------------------------------------------------
# ## [,1] [,2] [,3] [,4] [,5]
# ## [1,] 1.00000000 0.11825938 0.10236985 0.04921696 0.03943488
# ## [2,] 0.11825938 1.00000000 0.18858241 0.12124217 0.06391103
# ## [3,] 0.10236985 0.18858241 1.00000000 0.05819984 0.06175942
# ## [4,] 0.04921696 0.12124217 0.05819984 1.00000000 0.05714638
# ## [5,] 0.03943488 0.06391103 0.06175942 0.05714638 1.00000000
## ----eval = FALSE-------------------------------------------------------------
# # By GO Terms
# go1 <- c(
# "GO:0005215", "GO:0005488", "GO:0005515",
# "GO:0005625", "GO:0005802", "GO:0005905"
# ) # AP4B1
# go2 <- c(
# "GO:0005515", "GO:0005634", "GO:0005681",
# "GO:0008380", "GO:0031202"
# ) # BCAS2
# go3 <- c(
# "GO:0003735", "GO:0005622", "GO:0005840",
# "GO:0006412"
# ) # PDE4DIP
# golist <- list(go1, go2, go3)
# parGOSim(golist, type = "go", ont = "CC", measure = "Wang")
## ----eval = FALSE-------------------------------------------------------------
# ## [,1] [,2] [,3]
# ## [1,] 1.000 0.344 0.17
# ## [2,] 0.344 1.000 0.24
# ## [3,] 0.170 0.240 1.00
## ----eval = FALSE-------------------------------------------------------------
# # By Entrez gene id
# genelist <- list(c("150", "151", "152", "1814", "1815", "1816"))
# parGOSim(genelist, type = "gene", ont = "BP", measure = "Wang")
## ----eval = FALSE-------------------------------------------------------------
# ## 150 151 152 1814 1815 1816
# ## 150 1.000 0.702 0.725 0.496 0.570 0.455
# ## 151 0.702 1.000 0.980 0.456 0.507 0.551
# ## 152 0.725 0.980 1.000 0.460 0.511 0.538
# ## 1814 0.496 0.456 0.460 1.000 0.687 0.473
# ## 1815 0.570 0.507 0.511 0.687 1.000 0.505
# ## 1816 0.455 0.551 0.538 0.473 0.505 1.000
## ----eval = FALSE-------------------------------------------------------------
# ids <- c("P00750", "P00751", "P00752")
# prots <- getUniProt(ids)
# prots
## ----eval = FALSE-------------------------------------------------------------
# ## [[1]]
# ## [1] "MDAMKRGLCCVLLLCGAVFVSPSQEIHARFRRGARSYQVICRDEKTQMIYQQHQSWLRPVLRSNRVEYCWCN
# ## SGRAQCHSVPVKSCSEPRCFNGGTCQQALYFSDFVCQCPEGFAGKCCEIDTRATCYEDQGISYRGTWSTAESGAECT
# ## NWNSSALAQKPYSGRRPDAIRLGLGNHNYCRNPDRDSKPWCYVFKAGKYSSEFCSTPACSEGNSDCYFGNGSAYRGT
# ## HSLTESGASCLPWNSMILIGKVYTAQNPSAQALGLGKHNYCRNPDGDAKPWCHVLKNRRLTWEYCDVPSCSTCGLRQ
# ## YSQPQFRIKGGLFADIASHPWQAAIFAKHRRSPGERFLCGGILISSCWILSAAHCFQERFPPHHLTVILGRTYRVVP
# ## GEEEQKFEVEKYIVHKEFDDDTYDNDIALLQLKSDSSRCAQESSVVRTVCLPPADLQLPDWTECELSGYGKHEALSP
# ## FYSERLKEAHVRLYPSSRCTSQHLLNRTVTDNMLCAGDTRSGGPQANLHDACQGDSGGPLVCLNDGRMTLVGIISWG
# ## LGCGQKDVPGVYTKVTNYLDWIRDNMRP"
# ##
# ## [[2]]
# ## [1] "MGSNLSPQLCLMPFILGLLSGGVTTTPWSLARPQGSCSLEGVEIKGGSFRLLQEGQALEYVCPSGFYPYPVQ
# ## TRTCRSTGSWSTLKTQDQKTVRKAECRAIHCPRPHDFENGEYWPRSPYYNVSDEISFHCYDGYTLRGSANRTCQVNG
# ## RWSGQTAICDNGAGYCSNPGIPIGTRKVGSQYRLEDSVTYHCSRGLTLRGSQRRTCQEGGSWSGTEPSCQDSFMYDT
# ## PQEVAEAFLSSLTETIEGVDAEDGHGPGEQQKRKIVLDPSGSMNIYLVLDGSDSIGASNFTGAKKCLVNLIEKVASY
# ## GVKPRYGLVTYATYPKIWVKVSEADSSNADWVTKQLNEINYEDHKLKSGTNTKKALQAVYSMMSWPDDVPPEGWNRT
# ## RHVIILMTDGLHNMGGDPITVIDEIRDLLYIGKDRKNPREDYLDVYVFGVGPLVNQVNINALASKKDNEQHVFKVKD
# ## MENLEDVFYQMIDESQSLSLCGMVWEHRKGTDYHKQPWQAKISVIRPSKGHESCMGAVVSEYFVLTAAHCFTVDDKE
# ## HSIKVSVGGEKRDLEIEVVLFHPNYNINGKKEAGIPEFYDYDVALIKLKNKLKYGQTIRPICLPCTEGTTRALRLPP
# ## TTTCQQQKEELLPAQDIKALFVSEEEKKLTRKEVYIKNGDKKGSCERDAQYAPGYDKVKDISEVVTPRFLCTGGVSP
# ## YADPNTCRGDSGGPLIVHKRSRFIQVGVISWGVVDVCKNQKRQKQVPAHARDFHINLFQVLPWLKEKLQDEDLGFL"
# ##
# ## [[3]]
# ## [1] "APPIQSRIIGGRECEKNSHPWQVAIYHYSSFQCGGVLVNPKWVLTAAHCKNDNYEVWLGRHNLFENENTAQF
# ## FGVTADFPHPGFNLSLLKXHTKADGKDYSHDLMLLRLQSPAKITDAVKVLELPTQEPELGSTCEASGWGSIEPGPDB
# ## FEFPDEIQCVQLTLLQNTFCABAHPBKVTESMLCAGYLPGGKDTCMGDSGGPLICNGMWQGITSWGHTPCGSANKPS
# ## IYTKLIFYLDWINDTITENP"
## ----protcheck----------------------------------------------------------------
x <- readFASTA(system.file("protseq/P00750.fasta", package = "protr"))[[1]]
# A real sequence
protcheck(x)
# An artificial sequence
protcheck(paste(x, "Z", sep = ""))
## ----protseg------------------------------------------------------------------
protseg(x, aa = "M", k = 5)
## -----------------------------------------------------------------------------
knitr::include_graphics("figures/protrweb.png")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.