SpecAnnotationMilena"

knitr::opts_chunk$set(echo = TRUE)
specMeta <- read.table("c:/Users/wewol/Google Drive/Finnland/specMeta.txt", stringsAsFactors = FALSE)
dim(specMeta)
str(specMeta)
library(prozor)
nrow(specMeta)
hist(specMeta$score, breaks=100)

library(reshape2)
head <- acast(specMeta,specMeta$peptideSeq ~ specMeta$fileName)
head[head == 0]<-NA
image(t(head))

dim(head)
quantable::mypairs(head[,1:4], log="xy")
length(unique(specMeta$peptideSeq))
upeptide <-unique(specMeta$peptideSeq)

resAll <- prozor::readPeptideFasta("c:/Users/wewol/Google Drive/Finnland/uniprot-taxonomy-Homo+sapiens+Human9606-UNREVIEWED.fasta.gz")
length(resAll)
resIso <- prozor::readPeptideFasta("c:/Users/wewol/Google Drive/Finnland/uniprot-taxonomy-Homo+sapiens+Human9606.fasta.gz")
length(resIso)
resRev <- prozor::readPeptideFasta("c:/Users/wewol/Google Drive/Finnland/uniprot-taxonomy-Homo+sapiens+Human9606-NOISO.fasta.gz")
length(resRev)


length(intersect(names(resIso), names(resRev)))
intersect(names(resIso), names(resAll))
names(resAll)[1:10]

annotAll <- prozor::annotatePeptides(upeptide, c(resIso,resAll))
length(unique(annotAll$peptideSeq))/ length(upeptide)

annotIso <- prozor::annotatePeptides(upeptide, resIso)
length(unique(annotIso$peptideSeq))/ length(upeptide)

annotRev <- prozor::annotatePeptides(upeptide, resRev)
length(unique(annotRev$peptideSeq))/ length(upeptide)


plot(sort(table(annotAll$peptideSeq)),axes=FALSE)
axis(2)
sum(table(annotAll$peptideSeq) == 1)/ length(table(annotAll$peptideSeq)) * 100
head(annotAll)


plot(sort(table(annotIso$peptideSeq)),axes=FALSE)
axis(2)

sum(table(annotIso$peptideSeq) == 1)/ length(table(annotIso$peptideSeq)) * 100

plot(sort(table(annotRev$peptideSeq)),axes=FALSE)
sum(table(annotRev$peptideSeq) == 1)/ length(table(annotRev$peptideSeq)) * 100

do protein inference

library(Matrix)
precursors <- unique(subset(specMeta,select = c(peptideModSeq,precursorCharge,peptideSeq )))
dim(precursors)

For trembl and swissprot

Annotate the precursors with protein ID's

annotatedPrecursors <-merge(precursors , subset(annotAll, select= c(peptideSeq,proteinID)), by.x="peptideSeq", 
                            by.y="peptideSeq")

annotatedPrecursors$precursorCharge <- annotatedPrecursors$precursorCharge
annotatedPrecursors$peptideModSeq <- annotatedPrecursors$peptideModSeq

xx<-prepareMatrix(annotatedPrecursors)
library(Matrix)
image(xx)
xx <- greedy(xx)
length(unique(unlist(xx)))

For Swissprot with isoforms

annotatedPrecursors <-merge(precursors , subset(annotIso, select= c(peptideSeq,proteinID)), by.x="peptideSeq", 
                            by.y="peptideSeq")

annotatedPrecursors$precursorCharge <- annotatedPrecursors$precursorCharge
annotatedPrecursors$peptideModSeq <- annotatedPrecursors$peptideModSeq

xx<-prepareMatrix(annotatedPrecursors)
image(xx)
xx <- greedy(xx)
length(unique(unlist(xx)))

For swissprot no isoforms

annotatedPrecursors <-merge(precursors , subset(annotRev, select= c(peptideSeq,proteinID)), by.x="peptideSeq", 
                            by.y="peptideSeq")

annotatedPrecursors$precursorCharge <- annotatedPrecursors$precursorCharge
annotatedPrecursors$peptideModSeq <- annotatedPrecursors$peptideModSeq

xx<-prepareMatrix(annotatedPrecursors)
image(xx)
xx <- greedy(xx)
length(unique(unlist(xx)))


Try the prozor package in your browser

Any scripts or data that you put into this service are public.

prozor documentation built on Dec. 11, 2021, 9:51 a.m.