library(dplyr) hapinsf <- "~/Dropbox/BayesianDA/FGEM_Data/Huang_Plosgen_haploinsufficiency_score_by_gene.txt" hapdf <- read.table(hapinsf) %>% dplyr::rename(Gene=V1,value=V2) %>% dplyr::mutate(feature="HapInsuff",class="HapInsuff")
We can also use continuous features. As an example, let's use ExAC conservation, and brain gene expression.
library(FGEM) expf <- "~/Dropbox/BayesianDA/FGEM_Data/BrainSpanSexVar.RDS" expmat <- readRDS(expf) expdf <- exp2df(expmat) all_anno_df <- bind_rows(exacdf,hapdf) all_anno_df <-distinct(all_anno_df,feature) %>% mutate(feat_ind=1:n()) %>% inner_join(all_anno_df) max(all_anno_df$feat_ind) saveRDS(all_anno_df,"~/Dropbox/BayesianDA/FGEM_Data/all_annotations.RDS") saveRDS(sub_all_anno_df,"~/Dropbox/BayesianDA/FGEM_Data/sub_annotations.RDS")
library(ggplot2) library(tidyr) library(FGEM) library(dplyr) exac_anno <- distinct(exacdf,feature) %>% mutate(feat_ind=1:n()) %>% inner_join(exacdf) GO_df <- distinct(BPGOdf,feature) %>% mutate(feat_ind=1:n()) %>% inner_join(BPGOdf) exac_feat <- distinct(exacdf,feature) %>% mutate(feat_ind=1:n()) %>% mutate(class="ExAC") GO_feat <- distinct(BPGOdf,feature) %>% mutate(feat_ind=1:n()) %>% mutate(class="GO_BP") afgemres <- dir("~/Dropbox/BayesianDA/FGEM_Data/NFGEM_GO_RDS/",full.names = T) go_res <- bind_rows(lapply(afgemres,readRDS)) %>% distinct(feature,.keep_all=T) %>% inner_join(GO_feat) %>% mutate(qval=p.adjust(pval)) %>% arrange(qval) exac_res <- readRDS("~/Dropbox/BayesianDA/FGEM_Data/Exac_res.RDS") %>% ungroup() %>% inner_join(exac_feat) all_res <- bind_rows(go_res,exac_res) %>% arrange(qval) rep_res <- select(all_res,feature,Beta,Intercept,Chisq,pval,class,qval) sig_res <- filter(rep_res,qval<0.05) sig_rep <- filter(rep_res,qval<0.1) %>% select(feature,Beta,Intercept,class) # boot_pRec <- filter(exac_anno,feature=="pRec") %>% do(FGEM_bootstrap(cfeat_df(.,BFdata,impute=F))) anno_df <- bind_rows(exacdf,BPGOdf) sig_anno <- inner_join(sig_rep,anno_df) nsig_anno <- select(sig_anno,feature,Gene,value) datadf <- BFdata feat_df <- spread(nsig_anno,feature,value,fill = 0) %>% inner_join(select(datadf,Gene,BF)) tfeat_df <- select(feat_df,-starts_with("GO")) %>% select(bp,mu_mis,BF,Gene) my_res <- FGEM_df(tfeat_df) sfinal_model <- gen_model(feat_list = unique(sig_anno$feature),annodf = sig_anno,datadf = datadf,scale = T) sfinal_model <- gen_model(feat_list = unique(sig_anno$feature),annodf = sig_anno,datadf = datadf,scale = F) sfinal_posterior <- posterior_results(sfinal_model,odatadf,anno_df) select(post_results,Gene,new_prior,new_posterior,old_posterior,post_improvement,BF) %>% arrange(desc(post_improvement)) %>% head(10)
ggplot(posterior_results)+geom_point(aes(x=log10(BF),y=post_improvement)) arrange(post_data,desc(post_improvement)) %>% select(Gene,new_prior,new_posterior,old_posterior,post_improvement,BF) %>% tail # ggplot(post_data)+geom_point(aes(x=new_prior,y=new_posterior)) # sig_prior <- group_by(sig_anno,feature) %>% do(gen_prior_df(.,datadf = datadf)) %>% ungroup() # sig_mat <- spread(sig_prior,key=feature,value=prior) %>% filter(complete.cases(.)) %>% dplyr::select(-Gene) %>% data.matrix() # sig_pca <- prcomp(sig_mat,center=T,scale=T) # biplot(sig_pca) # # Exac_prior <- inner_join(feat_class,sig_prior) %>% filter(class=="ExAC") # Exac_mat <- spread(data=Exac_prior,key=feature,value=prior) %>% filter(complete.cases(.)) # Exac_mat <- data.matrix(select(Exac_mat,-class,-Gene)) # Exac_pc <- prcomp(Exac_mat,center=T,scale=T) # biplot(Exac_pc) # # # GO_prior <- inner_join(feat_class,sig_prior) %>% filter(class%in%c("GO_BP")) # GO_mat <- spread(data = GO_prior,key = feature,value = prior) %>% filter(complete.cases(.)) # GOpca <- prcomp(data.matrix(select(GO_mat,-Gene,-class)),center = T,scale. = T) # biplot(GOpca) )
ggplot(post_results)+geom_point(aes(x=log10(BF),y=new_posterior))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.