analyses/DGN_heritability.R

library(gdsfmt)
library(SNPRelate)
library(RColumbo)
library(dplyr)
library(readr)
library(tidyr)





dgn_gdsnf <- "/media/nwknoblauch/Data/DGN/Lev/DGN.gds"
dgn_h5 <- "/media/nwknoblauch/Data/DGN/Lev/eQTL/DGN_ortho.h5"
dgn_bed <- "/media/nwknoblauch/Data/DGN/Lev/DGN"
#gtex_h5 <- "/home/nwknoblauch/Desktop/eQTL/Snake/Whole_Blood_eQTL_v6p_raw_data.h5"
#gtex_bed <- "/media/nwknoblauch/Data/GTEx/Whole_Blood_v6p_bed"
dgn_famf <- "/media/nwknoblauch/Data/DGN/Lev/DGN.fam"
expf <- "/media/nwknoblauch/Data/DGN/Lev/data_used_for_eqtl_study/cis_data.txt"
#gtex_famf <- "/media/nwknoblauch/Data/GTEx/Whole_Blood_v6p_bed.fam"
expdir <- "/home/nwknoblauch/Desktop/eQTL/Heritability/expdir/"
dgn_gds <- snpgdsOpen(dgn_gdsnf)
all_sample_ids <- data_frame(whole_id=read.gdsn(index.gdsn(dgn_gds,"sample.id")))
all_sample_ids <- separate(all_sample_ids,col = whole_id,into = c("first","second","third","fourth","fifth"),remove = F)


ex_ids <- read_delim(expf,delim = "\t",col_names="ID")

ex_ids <- filter(ex_ids,!is.na(ID))
sub_sample_ids <- semi_join(all_sample_ids,ex_ids,by=c("fourth"="ID"))
#gtexgds <- snpgdsOpen(gtex_gdsnf)
snpgdsClose(dgn_gds)
snpgdsGDS2BED(dgn_gdsnf,dgn_bed,sample.id = sub_sample_ids$whole_id,)

nexp <- get_rownum_h5(dgn_h5,"EXPdata","expression")
expmat <- read_fmat_h5(hap_h5file = dgn_h5,groupname = "EXPdata",dataname = "expression",offset = 0,chunksize = nexp)
sexpmat <- scale(expmat,center = T,scale = T)
expleg <- read_h5_df(dgn_h5,"EXPinfo")

expped <- read.table(dgn_famf,header=F,sep="\t")
colnames(expped) <-c("Family","Individual","Paternal","Maternal","Sex",
                     "Expression")
i <- 1

grmc <- paste0("/home/nwknoblauch/Downloads/gcta/gcta64 --bfile ",dgn_bed,
               " --maf 0.01 --make-grm --out test --thread-num 10")


setwd("/media/nwknoblauch/Data/DGN/Lev")
system(grmc)

fdfl <- list()
ndfl <- list()
expf <- paste0(expdir,"fgid_",expleg$fgeneid[i],".pheno")
for(i in i:nexp){
  cat(i," of ",nexp,"\n")

  expped <- mutate(expped,Expression=sexpmat[,i]) %>% select(Family,Individual,Expression)
  write.table(expped,file = expf,sep="\t",col.names = F,row.names = F,
              quote=F)
  gcta_command <- paste0("/home/nwknoblauch/Downloads/gcta/gcta64 --grm ",
                         "test --pheno ",expf," --reml --out ptest --thread-num 10 --reml-maxit 200")
  system(gcta_command,ignore.stdout = T,ignore.stderr = T)
  if(file.exists("ptest.hsq")){
    fdf <-read.table("ptest.hsq",sep="\t",nrows = 4,header=T)
    fdfl[[i]] <- fdf
    ndf <- read.table("ptest.hsq",sep="\t",skip = 5,header=F)
    ndfl[[i]] <- ndf
    file.remove(expf)
    file.remove("ptest.hsq")
  }else{
    cat("ERROR!!!\n")
    fdfl[[i]] <- NULL
    ndfl[[i]] <- NULL
    file.remove(expf)
  }
}

nfdfl <- mapply(function(df,gid){
  if(!is.null(df)){
    return(mutate(df,fgeneid=gid))
  }else{
    return(NULL)
  }
},fdfl,expleg$fgeneid,SIMPLIFY = F)

nndfl <- mapply(function(df,gid){
  if(!is.null(df)){
    return(mutate(df,fgeneid=gid))
  }else{
    return(NULL)
  }
},ndfl,expleg$fgeneid,SIMPLIFY = F)

vpdf <- bind_rows(nfdfl)
vpdf <- filter(vpdf,Source=="V(G)/Vp") %>% rename(h=Variance) %>% select(-Source)
sdf <- bind_rows(nndfl)

saveRDS(sdf,"/media/nwknoblauch/Data/DGN/DGN_ortho_summary.RDS")
saveRDS(vpdf,"/media/nwknoblauch/Data/DGN/DGN_ortho_estimates.RDS")

library(ggplot2)
her <- vpdf
dgn_her <- rename(her,DGN_h=h,DGN_SE=SE)
filter(her) %>% ggplot()+geom_histogram(aes(x=h),bins=100)+ggtitle("Heritability of all (orthogonalized wrt covariates) genes \n(GCTA trans+cis)")
filter(her,h>1e-06) %>% ggplot()+geom_histogram(aes(x=h),bins=100)+ggtitle("Heritability of heritable genes\n(orthogonalized wrt covariates) \n(h>1e-6) \n(GCTA trans+cis)")


ovpdf <- vpdf
osdf <- sdf

vpdf <- readRDS("/media/nwknoblauch/Data/GTEx/GCTA_trans_ortho_estimates.RDS")
sdf <- readRDS("/media/nwknoblauch/Data/GTEx/GCTA_trans_summary.RDS")

gtex_her <- filter(vpdf,Source=="V(G)/Vp") %>% rename(GTEx_h=Variance,GTEx_SE=SE) %>% select(-Source)

nher<- inner_join(gtex_her,dgn_her,by="fgeneid")


ggplot(nher)+geom_point(aes(x=DGN_h,y=GTEx_h))
filter(nher,GTEx_h>1e-6,DGN_h>1e-6)%>% ggplot(aes(x=DGN_h,y=GTEx_h))+geom_point()+geom_smooth(method="lm")
ggplot(nher)+geom_point(aes(x=ortho_h,y=no_ortho_h))+ggtitle("Heritability of raw expression \nvs\n heritability of expression orthogonalized to covariates")
CreRecombinase/RColumbo documentation built on May 6, 2019, 12:52 p.m.