scripts/prepare_ProMex_UnAttributed.R

# Script for looking at ProMex output
# Metadata for Lewy study added for customization
# And finally make MSnSet object


fn <- "../notinst/extdata/ProMex/UnAttributed/Lewy_Intact_ProMex_result_UnAttributed.txt.xz"
dat <- read.delim(fn, check.names=F, as.is=T)

# need to come up with a feature name convention
# let's just keep it simple XYZ_i, where
# XYZ is part of UniProt entry name XYZ_HUMAN
# i is the consecutive number of the proteoform
# within a given protein

# prot <- sub("_HUMAN","",dat$ProteinName)
# protForm <- c(unique(prot),prot)
# protForm <- make.unique(protForm)
# protForm <- protForm[-seq_along(unique(prot))]
protForm <- sprintf("PF_%05d", seq_len(nrow(dat)))


fcol <- c("MonoMass", "MinNet", "MaxNet")
ecol <- grep("rep",colnames(dat),value=T)
d.feat <- dat[,fcol]
rownames(d.feat) <- protForm
d.exprs <- as.matrix(dat[,ecol])
d.exprs[d.exprs == 0] <- NA
rownames(d.exprs) <- protForm


metaData <- read.delim("../notinst/extdata/lewy_intact_meta.txt", header = T, as.is=T)
rownames(metaData) <- sprintf("rep%s",as.numeric(sub(".*(\\d{2})","\\1",metaData$file)))


library("MSnbase")
m <- MSnSet(exprs = d.exprs, fData = d.feat, pData = metaData)
stopifnot(validObject(m))
sampleNames(m) <- pData(m)$sample.id


# tweak pData
pData(m)$subject.type <- factor(pData(m)$subject.type, 
                                levels=c("control.2", "control.1", "case"))
pData(m)$match.group <- as.factor(sprintf("%02d", pData(m)$match.group))
pData(m)$Projid <- as.character(pData(m)$Projid)
pData(m)$homogenizationBatch <- as.factor(pData(m)$homogenizationBatch)
pData(m)$prepBatch <- as.factor(pData(m)$prepBatch)

# # remove anything that is not present in at least 26 samples
# plot(table(rowSums(exprs(m) > 0))) # 26 is the min
# m <- m[rowSums(exprs(m) > 0) > 25,]
# exprs(m)[exprs(m) == 0] <- NA
# 
# # some data preprocessing
# exprs(m) <- log2(exprs(m))
# exprs(m) <- sweep(exprs(m), 1, rowMeans(exprs(m), na.rm=TRUE)) # zero center proteoforms

# what is the min observation count?
plot(table(rowSums(!is.na(exprs(m)))))

# 
save(m, file="../data/promex_unattributed.RData")
vladpetyuk/LewyBodies.SN.TopDown documentation built on May 3, 2019, 6:15 p.m.