require(jspputils)
getwd()

initial QC

filename <- '../combined/txt/modificationSpecificPeptides.txt'
additionalfilter <- "grepl('[pP]hos', `Modifications`)"
myeset <- mqtxt_to_eset(filename, 
                        normalize = FALSE, 
                        impute = FALSE, 
                        transformfun = log2,
                        additional_filter = additionalfilter
                        )

group_missing <- count_missing_by.data.frame(
    Biobase::exprs(myeset), 
    named_character_vector = c('Hemi' = 'Hemi-', 'WT' = 'WT-'))

colnames(group_missing) <- paste0('N.missing.in.', colnames(group_missing))

dim(myeset)
group_missing %>% lapply(table)

missing_in_Hemi <- group_missing$N.missing.in.Hemi > 6 
missing_in_WT <- group_missing$N.missing.in.WT > 6 

missing_in_both <- missing_in_Hemi & missing_in_WT

table(missing_in_both)

myeset <- mqtxt_to_eset(filename, 
                        normalize = FALSE, 
                        impute = TRUE, 
                        transformfun = log2,
                        additional_filter = additionalfilter
                        )
tmpset <- myeset[!missing_in_both,]
limma::plotMDS(tmpset)

# pairs(Biobase::exprs(myeset))
corrplot::corrplot(cor(Biobase::exprs(tmpset)), order = 'hclus')
plot_dists.eset(tmpset)
boxplot(Biobase::exprs(tmpset))
Biobase::exprs(tmpset) %>% 
    reshape2::melt() %>% 
    ggplot(aes(x = Var2, y = value),
           xlab_name = "Intesity",
           ylab_name = "Run") + 
    geom_violin() + 
    geom_jitter(height = 0, alpha =0.01)


foo <- mqtxt_to_eset(
    filename, 
    normalize = FALSE, 
    impute = FALSE, 
    transformfun = log2,
    additional_filter = additionalfilter,
    nor)

foo %>%
    Biobase::exprs() %>% limma::normalizeBetweenArrays(method = "scale") %>%
    reshape2::melt() %>% 
    ggplot(aes(x = Var2, y = value),
           xlab_name = "Intesity",
           ylab_name = "Run") + 
    geom_violin() + 
    geom_jitter(height = 0, alpha =0.01)
myeset <- mqtxt_to_eset(filename, 
                        normalize = FALSE, 
                        impute = FALSE, 
                        transformfun = log2,
                        additional_filter = additionalfilter,
                        normalization_method = 'scale',
                        drop_cols = 'Intensity 40718-Hemi-A|CDKL5-'
                        )

group_missing <- count_missing_by.data.frame(
    Biobase::exprs(myeset), 
    named_character_vector = c('Hemi' = 'Hemi-', 'WT' = 'WT-'))

colnames(group_missing) <- paste0('N.missing.in.', colnames(group_missing))

group_missing %>% lapply(table)

missing_in_Hemi <- group_missing$N.missing.in.Hemi > 6
missing_in_WT <- group_missing$N.missing.in.WT > 6

missing_in_both <- missing_in_Hemi & missing_in_WT

table(missing_in_both)


raw_myeset <- mqtxt_to_eset(filename, 
                        normalize = TRUE, 
                        impute = TRUE, 
                        transformfun = log2,
                        additional_filter = additionalfilter, 
                        normalization_method = 'scale',
                        drop_cols = 'Intensity 40718-Hemi-A|CDKL5-'
                        )


stopifnot(dim(myeset) == dim(raw_myeset))

Biobase::fData(raw_myeset) <- cbind(Biobase::fData(raw_myeset), group_missing)

names_column <- grep(
    "Gene.names|gene names", 
    colnames(Biobase::fData(myeset)),
    ignore.case = TRUE, 
    value = TRUE) %>% make.names()


data_nonrandom_missing_WT <- raw_myeset[ missing_in_WT & !missing_in_Hemi, ]
data_nonrandom_missing_Hemi <- raw_myeset[ missing_in_Hemi & !missing_in_WT, ]
data_random_missing <- raw_myeset[ missing_in_both, ]
data_non_missing <- raw_myeset[!missing_in_WT & !missing_in_Hemi ,]


dim(data_nonrandom_missing_WT)
dim(data_nonrandom_missing_Hemi)
dim(data_random_missing)

myeset <- raw_myeset[!missing_in_both ,]

Processing

#pairs(Biobase::exprs(myeset))
plot_dists.eset(myeset)
limma::plotMDS(myeset, dim.plot = c(1,2), plot = TRUE)
limma::plotMDS(myeset, dim.plot = c(4,3), plot = TRUE)

corrplot::corrplot(cor(Biobase::exprs(myeset)), order = 'hclus')
boxplot(Biobase::exprs(myeset))

Biobase::exprs(myeset) %>% 
    reshape2::melt() %>% 
    ggplot(aes(x = Var2, y = value),
           xlab_name = "Intesity",
           ylab_name = "Run") + 
    geom_violin() + 
    geom_jitter(height = 0, alpha =0.01)

Model Fitting

factor_vector <-  c( 'WT' = 'WT', 'Hemi' = "Hemi" )
batch_vector <- c( '40718' = '40718', '32918' = "32918", '05918' = '50918' )

design <- make_exp_design(myeset, 
                          factor_vector = factor_vector, 
                          reference_factor = "Hemi", 
                          batch_vector = batch_vector)
design

fit <- limma::lmFit(myeset, design)
fit2 <- limma::eBayes(fit)
# limma::topTable(fit2, coef="s_mainfactorWT", confint = TRUE)

dim(fit2)
# extract_fit_table(fit.contr) %>% to_clipboard()

Plotting

Plot data not missing at random

makealltheplots(fit = fit2, coef = 's_mainfactorWT', plotprefix = 'ppeptide_nonMR_both_')

NonMissing data

data_non_missing %>% 
    limma::lmFit(., design) %>%
    limma::eBayes(.) %>%
    makealltheplots(fit = ., coef = 's_mainfactorWT', plotprefix = 'ppeptide_nonMissing_')

NONRANDOM in WT

data_nonrandom_missing_WT %>% 
    limma::lmFit(., design) %>%
    limma::eBayes(.) %>%
    makealltheplots(fit = ., coef = 's_mainfactorWT', plotprefix = 'ppeptide_nonMR_wt_')

NONRANDOM in Hemi

data_nonrandom_missing_Hemi %>% 
    limma::lmFit(., design) %>%
    limma::eBayes(.) %>%
    makealltheplots(fit = ., coef = 's_mainfactorWT', plotprefix = 'ppeptide_nonMR_Hemi_')

Randomly Missing Data

data_random_missing %>% 
    limma::lmFit(., design) %>%
    limma::eBayes(.) %>%
    makealltheplots(fit = ., coef = 's_mainfactorWT', plotprefix = 'ppeptide_MR_data_')

Plots with the Raw data (includes missing data)

raw_myeset %>% 
    limma::lmFit(., design) %>%
    limma::eBayes(.) %>%
    makealltheplots(fit = ., coef = 's_mainfactorWT', plotprefix = 'ppeptide_raw_data_')
paste0(format(Sys.time(), "%Y%m%d_%H%M%S_"), "data_set.csv")
sessionInfo()


jspaezp/jspputils documentation built on May 23, 2019, 2:50 p.m.