R/posthoc_or.R

Defines functions fit_stratified_or_q4 fit_stratified_or Model.all.new.dosage rnd2 format_res ptab ORtab Model.all.new

#=============================================================================#
# stratified odds ratios from yi's code
#=============================================================================#


#' Model.all.new
#'
#' create stratified odds ratios. Written by yi lin
#'
#' @param data dataset
#' @param mod string of model to be fitted, exclude dependent var e.g. "age_ref_imp+sex+study_gxe+pc1+pc2+pc3"
#' @param env string of exposure variable e.g. 'asp_ref'
#'
#' @return a list containing model estimates and standard errors for stratified table cells
#' @export
#'
#' @examples Model.all.new(figi, "age_ref_imp+sex+study_gxe+pc1+pc2+pc3", "asp_ref")
Model.all.new <- function(data,mod,env){
  if(length(table(data$p1))<=1 | length(table(data$p2))<=1 | 
     var(2*data$p2+data$p1,na.rm=T)==0 | 
     sum((data$p2+data$p1)>1.1,na.rm=T)>0) {
    res = NA
  } else {
    data[,env]=factor(data[,env])
    mModel <- paste0('outcome~',env,'*p1+',env,'*p2+',mod)
    tmp <- summary(glm(mModel, family='binomial', data=data))
    COV <- tmp$cov.unscaled
    env.idx = env
    if(is.factor(data[,env])) env.idx = paste0(env,levels(data[,env])[-1])
    
    # stratified by GE  
    eg0 <- tmp$coef[env.idx,c(1,2)]
    beta.eg1 <- tmp$coef[env.idx,1]+tmp$coef['p1',1]+tmp$coef[paste0(env.idx,':p1'),1]
    beta.eg2 <- tmp$coef[env.idx,1]+tmp$coef['p2',1]+tmp$coef[paste0(env.idx,':p2'),1]
    e0g <- tmp$coef[c('p1','p2'),c(1,2)]
    I3 =  rep(1,3)
    se.eg1 = se.eg2 = NULL
    for(i in 1:(nlevels(data[,env])-1))
    {
      covs = COV[c(env.idx[i],'p1',paste0(env.idx[i],':p1')),c(env.idx[i],'p1',paste0(env.idx[i],':p1'))]		
      se.eg1 = c(se.eg1,sqrt(t(I3)%*%covs%*%I3))
      covs = COV[c(env.idx[i],'p2',paste0(env.idx[i],':p2')),c(env.idx[i],'p2',paste0(env.idx[i],':p2'))]		
      se.eg2 = c(se.eg2,sqrt(t(I3)%*%covs%*%I3))
    }
    GE<- c(eg0,e0g,beta.eg1,se.eg1,beta.eg2,se.eg2)
    
    # stratified by G 
    g0 <-tmp$coef[env.idx,1:2]
    if(length(env.idx)==1){
      idx.g1 = c(env.idx,paste0(env.idx,':p1'))
      idx.g2 = c(env.idx,paste0(env.idx,':p2'))
      g1 = sum(tmp$coef[idx.g1,1],na.rm=T)
      g2 = sum(tmp$coef[idx.g2,1],na.rm=T)
      Is1 =  rep(1,length(idx.g1))
      Is2 =  rep(1,length(idx.g2))
    }else{
      est.g1 <- data.frame(tmp$coef[env.idx,1],tmp$coef[paste0(env.idx,':p1'),1])
      est.g2 <- data.frame(tmp$coef[env.idx,1],tmp$coef[paste0(env.idx,':p2'),1])
      g1 = rowSums(est.g1,na.rm=T)
      g2 = rowSums(est.g2,na.rm=T)
      Is1 =  rep(1,ncol(est.g1))
      Is2 =  rep(1,ncol(est.g2))
    }
    
    se.g1 = se.g2 = NULL
    for(i in 1:(nlevels(data[,env])-1))
    {
      covs1 = COV[c(env.idx[i],paste0(env.idx[i],':p1')),c(env.idx[i],paste0(env.idx[i],':p1'))]  
      covs2 = COV[c(env.idx[i],paste0(env.idx[i],':p2')),c(env.idx[i],paste0(env.idx[i],':p2'))]  
      se.g1 <- c(se.g1,sqrt(t(Is1)%*%covs1%*%Is1))
      se.g2 <- c(se.g2,sqrt(t(Is2)%*%covs2%*%Is2))
    }
    
    G<- c(g0,g1,se.g1,g2,se.g2)
    
    # stratified by E
    
    e0 <-tmp$coef[c('p1','p2'),1:2]
    if(length(env.idx)==1){
      idx.e11 = c('p1',paste0(env.idx,':p1'))
      idx.e12 = c('p2',paste0(env.idx,':p2'))
      e11 = sum(tmp$coef[idx.e11,1],na.rm=T)
      e12 = sum(tmp$coef[idx.e12,1],na.rm=T)
      Is1 =  rep(1,length(idx.e11))
      Is2 =  rep(1,length(idx.e12))
    }else{
      est.e11 <- data.frame(tmp$coef['p1',1],tmp$coef[paste0(env.idx,':p1'),1])
      est.e12 <- data.frame(tmp$coef['p2',1],tmp$coef[paste0(env.idx,':p2'),1])
      e11 = rowSums(est.e11,na.rm=T)
      e12 = rowSums(est.e12,na.rm=T)
      Is1 =  rep(1,ncol(est.e11))
      Is2 =  rep(1,ncol(est.e12))
    }
    
    se.e11 = se.e12 = NULL
    for(i in 1:(nlevels(data[,env])-1))
    {
      covs1 = COV[c('p1',paste0(env.idx[i],':p1')),c('p1',paste0(env.idx[i],':p1'))]  
      covs2 = COV[c('p2',paste0(env.idx[i],':p2')),c('p2',paste0(env.idx[i],':p2'))]  
      se.e11 <- c(se.e11,sqrt(t(Is1)%*%covs1%*%Is1))
      se.e12 <- c(se.e12,sqrt(t(Is2)%*%covs2%*%Is2))
    }
    E <- c(e0[1,1],e11,e0[1,2],se.e11,e0[2,1],e12,e0[2,2],se.e12)
  }
  res<-list(GE=GE,G=G,E=E)
}



#' ORtab
#' 
#' clean up outputs from Model.all.new
#'
#' @param x 
#' @param elvl 
#' @param glvl 
#' @param res 
#'
#' @return
#' @export
#'
ORtab = function(x,elvl,glvl,res) {
  ORs = data.frame(matrix(NA,nrow=length(elvl),ncol=length(glvl)))
  colnames(ORs) = paste0('p',glvl)
  rownames(ORs) = paste0('OR',elvl)
  ORs[1,1] = 1
  for(c in colnames(ORs))
    for(r in rownames(ORs))
      if(!(c=='p0' & r==paste0('OR',elvl[1]))) 
        ORs[r,c] =  as.character(res[res$snp %in% x,paste0(r,c)])
  ORs      
} 


#' ptab
#' 
#' clean up output from Model.all.new
#'
#' @param x 
#' @param elvl 
#' @param glvl 
#' @param res 
#'
#' @return
#' @export
#'
ptab = function(x,elvl,glvl,res) {
  pval = data.frame(matrix(NA,nrow=length(elvl),ncol=length(glvl)))
  colnames(pval) = paste0('p',glvl)
  rownames(pval) = elvl
  pval[1,1] = 1
  for(c in colnames(pval))
    for(r in rownames(pval))
      if(!(c=='p0' & r==elvl[1])) 
        pval[r,c] =  as.numeric(as.vector(res[res$snp %in% x,paste0('pval',r,c)]))
  pval = apply(pval,2,function(y) paste0('P= ',formatC(y,format='g',digit=2)))    
}


#' format_res
#' 
#' clean up output from Model.all.new
#'
#' @param res 
#'
#' @return
#' @export
#'
format_res <- function(res) {
  betas = colnames(res)[grep('beta',colnames(res),fixed=T)]
  ses = colnames(res)[grep('se',colnames(res),fixed=T)]
  ORs = sapply(seq(length(betas)),function(x,betas,ses,res) 
  {
    or = paste0(rnd2(exp(res[,betas[x]])),' (',
                rnd2(exp(res[,betas[x]]-qnorm(0.975)*res[,ses[x]])),'-',
                rnd2(exp(res[,betas[x]]+qnorm(0.975)*res[,ses[x]])),')')
  },betas=betas,ses=ses,res=res)
  if(nrow(res)==1)  ORs = data.frame(t(ORs)) 
  colnames(ORs) = sub('beta','OR',betas)
  pvals = sapply(seq(length(betas)),function(x,betas,ses,res) 
  {pval = 2*pnorm(-abs(res[,betas[x]]/res[,ses[x]]))},betas=betas,ses=ses,res=res)
  if(nrow(res)==1) pvals = data.frame(t(pvals)) 
  colnames(pvals) = sub('beta','pval',betas)
  pvals.p = apply(pvals,2, function(y) paste0('P= ',formatC(y,format='g',digit=2)))
  if(nrow(res)==1) pvals.p = data.frame(t(pvals.p)) 
  colnames(pvals.p) = paste0('P',colnames(pvals.p))
  res = data.frame(res,ORs,pvals,pvals.p,stringsAsFactors=F)
}


#' rnd2
#' 
#' rounds numbers
#'
#' @param x 
#'
#' @return
#' @export
#'
rnd2 <- function(x) {
  round(x, 2)
}










#' Model.all.new.dosage
#' 
#' similar to original function, but creating outputs for dosage odds ratios. this is so you can create stratified odds ratios but modeling E specific dosage associations (which is modeled continuously, meaning it's a per allele Odds Ratios)
#'
#' @param data 
#' @param mod 
#' @param env 
#'
#' @return
#' @export
#'
Model.all.new.dosage <- function(data, mod, env) {
  if(length(table(data$p1))<=1 | length(table(data$p2))<=1 | var(2*data$p2+data$p1,na.rm=T)==0 | sum((data$p2+data$p1)>1.1,na.rm=T)>0) {
    res = NA
  } else {
    data[,env]=factor(data[,env])
    mModel <- paste0('outcome~',env,'*dosage+',mod)
    tmp <- summary(glm(mModel, family='binomial', data=data))
    COV <- tmp$cov.unscaled
    env.idx = env
    if(is.factor(data[,env])) env.idx = paste0(env,levels(data[,env])[-1])
    
    # stratified by GE  
    eg0 <- tmp$coef[env.idx,c(1,2)]
    beta.eg1 <- tmp$coef[env.idx,1]+tmp$coef['dosage',1]+tmp$coef[paste0(env.idx,':dosage'),1]
    # beta.eg2 <- tmp$coef[env.idx,1]+tmp$coef['p2',1]+tmp$coef[paste0(env.idx,':p2'),1]
    e0g <- tmp$coef[c('dosage'),c(1,2)]
    I3 =  rep(1,3)
    se.eg1 = se.eg2 = NULL
    for(i in 1:(nlevels(data[,env])-1)) {
      covs = COV[c(env.idx[i],'dosage',paste0(env.idx[i],':dosage')), c(env.idx[i],'dosage',paste0(env.idx[i],':dosage'))]		
      se.eg1 = c(se.eg1,sqrt(t(I3)%*%covs%*%I3))
      # covs = COV[c(env.idx[i],'p2',paste0(env.idx[i],':p2')),c(env.idx[i],'p2',paste0(env.idx[i],':p2'))]		
      # se.eg2 = c(se.eg2,sqrt(t(I3)%*%covs%*%I3))
    }
    
    GE <- c(eg0,e0g,beta.eg1,se.eg1)
    
    # stratified by G 
    g0 <-tmp$coef[env.idx,1:2]
    if(length(env.idx)==1) {
      idx.g1 = c(env.idx,paste0(env.idx,':dosage'))
      g1 = sum(tmp$coef[idx.g1,1],na.rm=T)
      Is1 =  rep(1,length(idx.g1))
    } else {
      est.g1 <- data.frame(tmp$coef[env.idx,1],tmp$coef[paste0(env.idx,':p1'),1])
      est.g2 <- data.frame(tmp$coef[env.idx,1],tmp$coef[paste0(env.idx,':p2'),1])
      g1 = rowSums(est.g1,na.rm=T)
      g2 = rowSums(est.g2,na.rm=T)
      Is1 =  rep(1,ncol(est.g1))
      Is2 =  rep(1,ncol(est.g2))
    }
    
    se.g1 = se.g2 = NULL
    for(i in 1:(nlevels(data[,env])-1)) {
      covs1 = COV[c(env.idx[i],paste0(env.idx[i],':dosage')),c(env.idx[i],paste0(env.idx[i],':dosage'))]  
      # covs2 = COV[c(env.idx[i],paste0(env.idx[i],':p2')),c(env.idx[i],paste0(env.idx[i],':p2'))]  
      se.g1 <- c(se.g1,sqrt(t(Is1)%*%covs1%*%Is1))
      # se.g2 <- c(se.g2,sqrt(t(Is2)%*%covs2%*%Is2))
    }
    
    G <- c(g0,g1,se.g1)
    
    # stratified by E
    
    e0 <-tmp$coef[c('dosage'),1:2]
    if(length(env.idx)==1) {
      idx.e11 = c('dosage',paste0(env.idx,':dosage'))
      # idx.e12 = c('p2',paste0(env.idx,':p2'))
      e11 = sum(tmp$coef[idx.e11,1],na.rm=T)
      # e12 = sum(tmp$coef[idx.e12,1],na.rm=T)
      Is1 =  rep(1,length(idx.e11))
      # Is2 =  rep(1,length(idx.e12))
    } else {
      est.e11 <- data.frame(tmp$coef['p1',1],tmp$coef[paste0(env.idx,':p1'),1])
      est.e12 <- data.frame(tmp$coef['p2',1],tmp$coef[paste0(env.idx,':p2'),1])
      e11 = rowSums(est.e11,na.rm=T)
      e12 = rowSums(est.e12,na.rm=T)
      Is1 =  rep(1,ncol(est.e11))
      Is2 =  rep(1,ncol(est.e12))
    }
    
    se.e11 = se.e12 = NULL
    for(i in 1:(nlevels(data[,env])-1)) {
      covs1 = COV[c('dosage',paste0(env.idx[i],':dosage')),c('dosage',paste0(env.idx[i],':dosage'))]  
      # covs2 = COV[c('p2',paste0(env.idx[i],':p2')),c('p2',paste0(env.idx[i],':p2'))]  
      se.e11 <- c(se.e11,sqrt(t(Is1)%*%covs1%*%Is1))
      # se.e12 <- c(se.e12,sqrt(t(Is2)%*%covs2%*%Is2))
    }
    E <- c(e0[1],e11,e0[2],se.e11)
  }
  res<-list(GE=GE,G=G,E=E)
}




#-----------------------------------------------------------------------------#
# wrapper functions to generate two types of stratified odds ratios tables
# 1) original that Yi wrote
# 2) similar to 1, but includes dosage per allele effect in the G column
#-----------------------------------------------------------------------------#


#' fit_stratified_or
#' 
#' create stratified odds ratios table (Yi Lin)
#'
#' @param ds dataset
#' @param exposure string; exposure variable
#' @param snp string; snp name (chr1_bp_ref_alt)
#' @param hrc_version string; e.g. v2.4
#' @param covariates vector; adjustment covarites
#' @param mod string; model of exposure + covariates only. can specify study_gxe interactions if needed (that's how Yi's original code modeled these interaction models)
#' @param dosage whether you want to report dosage or genotype probabilities
#' @param output_dir string output directory
#'
#' @return a matrix of odds ratios, p-values, and strata counts
#' @export
#'
#' @examples fit_stratified_or(figi, 'asp_ref', 'chr5_1234_A_C', 'v2.4', c("age_ref_imp", "sex"), "age_ref_imp+sex")
fit_stratified_or <- function(ds, exposure, snp, hrc_version, covariates, mod, dosage = F, output_dir) {
  
  # output directory
  # output_dir <- paste0("/media/work/gwis/posthoc/", exposure, "/")
  
  # SNP information
  # snp_info <- unlist(strsplit(snp, split = "_"))
  # 
  # # check allele frequency
  # total_dosage <- sum(ds[,snp])
  # total_alleles <- nrow(ds) * 2
  # aaf <- total_dosage / total_alleles
  # 
  # # ---- recode SNPs so that major allele is the reference group
  # if(aaf >= 0.5) {
  #   # flip dosages
  #   ds[, snp] <- abs(ds[, paste0(snp)] - 2)
  #   ds[, paste0(snp, '_dose')] <- abs(ds[, paste0(snp, '_dose')] - 2)
  #   # flip genotype probabilities
  #   pp <- ds[,paste0(snp, "_p2")]
  #   ds[,paste0(snp, "_p2")] <- ds[, paste0(snp, "_p0")]
  #   ds[,paste0(snp, "_p0")] <- pp
  #   # assign ref/alt allele
  #   ref_allele <- snp_info[4]
  #   alt_allele <- snp_info[3]
  # } else {
  #   ref_allele <- snp_info[3]
  #   alt_allele <- snp_info[4]
  # }

  # SNP information
  snp_info <- unlist(strsplit(snp, split = "_"))

  model_check <- glm(glue("outcome ~ {exposure}*{snp} + {glue_collapse(covariates, sep = '+')}"), family = 'binomial', data = ds)

  # ---- recode SNPs so that lower risk allele is reference (to match RERI output)
  if (model_check[[1]][snp] < 0) {
    # flip dosages
    ds[, snp] <- abs(ds[, paste0(snp)] - 2)
    ds[, paste0(snp, '_dose')] <- abs(ds[, paste0(snp, '_dose')] - 2)
    # flip genotype probabilities
    pp <- ds[,paste0(snp, "_p2")]
    ds[,paste0(snp, "_p2")] <- ds[, paste0(snp, "_p0")]
    ds[,paste0(snp, "_p0")] <- pp
    # assign ref/alt allele
    ref_allele <- snp_info[4]
    alt_allele <- snp_info[3]
  } else {
    ref_allele <- snp_info[3]
    alt_allele <- snp_info[4]
  }
  
  
  # create data subset
  tmp1 = ds[, c('outcome', exposure, covariates)]
  # tmp2 = ds[, grepl(snp, names(ds))]
  tmp2 = ds[, grepl(paste(paste0(snp, c("_dose", "_p0", "_p1", "_p2")), collapse = "|"), names(ds))]
  names(tmp2) <- c("dosage", "p0", "p1", "p2")
  
  ds_tmp = cbind(tmp1, tmp2) %>% 
    na.omit(.[, c(exposure,'outcome', covariates, 'p1','p2','dosage')])
  
  res.pool.un = res.pool.g = res.pool.e = NULL
  Ncaco = data.frame(snps=snp,matrix(NA,ncol=6*2))
  colnames(Ncaco) = c('snps',paste0('Co',1:2),paste0('Ca',1:2),
                      paste0('Co1',1:2),paste0('Ca1',1:2),
                      paste0('Co2',1:2),paste0('Ca2',1:2))
  rownames(Ncaco) = snp
  
  
  #---- Calculate counts for each cell ----------------
  Ncaco[snp,c(paste0('Co',1:2),paste0('Ca',1:2))] = t(table(ds_tmp[,c('outcome',exposure)]))
  Ncaco[snp,c(paste0('Co1',1:2),paste0('Ca1',1:2))] = c(t(tapply(ds_tmp$p1, ds_tmp[,c('outcome',exposure)],sum,na.rm=T)))
  Ncaco[snp,c(paste0('Co2',1:2),paste0('Ca2',1:2))] = c(t(tapply(ds_tmp$p2, ds_tmp[,c('outcome',exposure)],sum,na.rm=T)))
  
  #-- Fit unrestricted model --------
  ds_tmp[,exposure] = factor(ds_tmp[,exposure])
  tmp = as.vector(Model.all.new(ds_tmp,mod,exposure))
  res.pool.un = rbind(res.pool.un,data.frame(snp,exposure,t(tmp$GE)))
  res.pool.e = rbind(res.pool.e,data.frame(snp,exposure,t(tmp$E)))
  res.pool.g = rbind(res.pool.g,data.frame(snp,exposure,t(tmp$G)))
  
  ## organize results ######
  elvl=c(0:1) ; glvl = c(0,1,2)
  
  colnames(res.pool.un) = c('snp','env',paste0('beta',elvl[-1],'p0'),paste0('se',elvl[-1],'p0'),
                            'beta0p1','beta0p2','se0p1','se0p2',
                            paste0('beta',elvl[-1],'p1'),paste0('se',elvl[-1],'p1'),
                            paste0('beta',elvl[-1],'p2'),paste0('se',elvl[-1],'p2'))
  res.pool.un = format_res(res.pool.un)
  
  
  ##== stratified by G results #######
  colnames(res.pool.g) = c('snp','env',paste0(rep(c('beta0','se0','beta1','se1','beta2','se2'),each=1),
                                              rep(elvl[-1],6)))
  res.pool.g = format_res(res.pool.g)
  
  ##== stratified by E results #######
  colnames(res.pool.e) = c('snp','env',paste0('beta1',elvl),paste0('se1',elvl),
                           paste0('beta2',elvl),paste0('se2',elvl))
  res.pool.e = format_res(res.pool.e)
  
  ##== Put into table 
  OR.tab = ORtab(snp,elvl=elvl,glvl=glvl,res=res.pool.un)
  pval.tab = ptab(snp,elvl=elvl,glvl=glvl,res=res.pool.un)
  
  ORg.tab = matrix(as.character(unlist(c(as.character(res.pool.g[,paste0('OR0',elvl[-1])]),
                                         as.character(res.pool.g[,paste0('OR1',elvl[-1])]),
                                         as.character(res.pool.g[,paste0('OR2',elvl[-1])])))), ncol=3)
  pg.tab = matrix(as.character(unlist(c(as.character(res.pool.g[,paste0('Ppval0',elvl[-1])]),
                                        as.character(res.pool.g[,paste0('Ppval1',elvl[-1])]),
                                        as.character(res.pool.g[,paste0('Ppval2',elvl[-1])])))),ncol=3)
  
  colnames(ORg.tab) = colnames(pg.tab) = c(paste0('p',glvl))
  ORe.tab = matrix(as.character(unlist(c(res.pool.e[,paste0('OR1',elvl)],
                                         res.pool.e[,paste0('OR2',elvl)]))),ncol=2)
  pe.tab  = matrix(as.character(unlist(c(res.pool.e[,paste0('Ppval1',elvl)],
                                         res.pool.e[,paste0('Ppval2',elvl)]))),ncol=2,byrow=T)
  
  #== calculate counts for G=0 and put counts into format ca/co
  for(i in 1:2){
    eval(parse(text=paste0('Ncaco$caco0',i,"=paste0(round(Ncaco$Ca",i,'-Ncaco$Ca1',i,'-Ncaco$Ca2',i,
                           ",1),'/',round(Ncaco$Co",i,'-Ncaco$Co1',i,'-Ncaco$Co2',i,',1))')))
    eval(parse(text=paste0('Ncaco$caco1',i,'=paste0(round(Ncaco$Ca1',i,",1),'/',round(Ncaco$Co1",i,",1))")))
    eval(parse(text=paste0('Ncaco$caco2',i,'=paste0(round(Ncaco$Ca2',i,",1),'/',round(Ncaco$Co2",i,",1))")))
  }
  
  #=== write into table
  est = cbind(rbind(OR.tab[1,],pval.tab[1,],OR.tab[2,],pval.tab[2,],
                    ORg.tab[1,],pg.tab[1,]),
              rbind(ORe.tab[1,],pe.tab[1,],ORe.tab[2,],pe.tab[2,],rep(NA,2),rep(NA,2)),
              N0 = c(Ncaco[snp,'caco01'],NA,Ncaco[snp,'caco02'],rep(NA,3)),
              N1 = c(Ncaco[snp,'caco11'],NA,Ncaco[snp,'caco12'],rep(NA,3)),
              N2 = c(Ncaco[snp,'caco21'],NA,Ncaco[snp,'caco22'],rep(NA,3)))
  
  # slightly modify output in case anyone wants per allele effects rather than p1/p2
  if(dosage == T) {
    
    tmp = as.vector(Model.all.new.dosage(ds_tmp,mod,exposure))
    
    res.pool.un = res.pool.g = res.pool.e = NULL
    res.pool.un = rbind(res.pool.un,data.frame(snp,exposure,t(tmp$GE)))
    # res.pool.e = rbind(res.pool.e,data.frame(snp,exposure,t(tmp$E)))
    # res.pool.g = rbind(res.pool.g,data.frame(snp,exposure,t(tmp$G)))
    
    elvl=c(0:1) ; glvl = c(0,1)
    tmp2 = NULL
    tmp2 = rbind(tmp2,data.frame(snp,exposure,t(tmp$GE)))
    
    colnames(tmp2) = c('snp','env',paste0('beta',elvl[-1],'p0'),paste0('se',elvl[-1],'p0'),
                       'beta0p1','beta0p2','se0p1','se0p2')
    tmp2 = format_res(tmp2)
    
    res.pool.e = rbind(res.pool.e,data.frame(snp,exposure,t(tmp$E)))
    colnames(res.pool.e) = c('snp',
                             'env',
                             paste0('beta1',elvl),paste0('se1',elvl))
    tmp2 <- format_res(res.pool.e)
    
    
    ORe.tab = matrix(as.character(unlist(c(tmp2[,paste0('OR1',elvl)]))),ncol=2)
    pe.tab  = matrix(as.character(unlist(c(tmp2[,paste0('Ppval1',elvl)]))),ncol=2,byrow=T)
    est2 <- rbind(ORe.tab[1,1],pe.tab[1,1],ORe.tab[1,2],pe.tab[1,2], NA, NA)
    
    final_out <- est %>% 
      dplyr::select(-c(4,5)) %>% 
      add_column(est2, .after = 3)
    
    colnames(final_out) <- c(paste0(ref_allele, ref_allele), 
                             paste0(ref_allele, alt_allele), 
                             paste0(alt_allele, alt_allele), 
                             paste0(alt_allele, " Allelic Dosage"), 
                             "N0", "N1", "N2")
    # this is only for FACTOR variables (might have to modify when you run Q4 variables)
    exposure_level <- levels(ds[,exposure])
    rownames(final_out) <- c(paste0(exposure,"=",exposure_level[1]), 
                             "p0",
                             paste0(exposure,"=",exposure_level[2]), 
                             "p1", 
                             exposure, 
                             "p")
    saveRDS(final_out, file = paste0(output_dir, "stratified_oddsratio_", snp, "_", exposure, "_dosage.rds"))
  } else {
    colnames(est) <- c(paste0(ref_allele, ref_allele), 
                       paste0(ref_allele, alt_allele), 
                       paste0(alt_allele, alt_allele), 
                       paste0(ref_allele, alt_allele), 
                       paste0(alt_allele, alt_allele),
                       "N0", "N1", "N2")
    exposure_level <- levels(ds[,exposure])
    rownames(est) <- c(paste0(exposure,"=",exposure_level[1]), 
                       "p0",
                       paste0(exposure,"=",exposure_level[2]), 
                       "p1", 
                       exposure, 
                       "p")
    
    saveRDS(est, file = paste0(output_dir, "stratified_oddsratio_", snp, "_", exposure, ".rds"))
  }
  
}





#' fit_stratified_or_q4
#'
#' use this one for 
#'
#' @param ds 
#' @param exposure 
#' @param snp 
#' @param hrc_version 
#' @param covariates 
#' @param mod 
#' @param dosage 
#' @param output_dir 
#'
#' @return
#' @export
#'
fit_stratified_or_q4 <- function(ds, exposure, snp, hrc_version, covariates, mod, dosage = F, output_dir) {
  # output_dir <- paste0("/media/work/gwis/posthoc/", exposure, "/")
  # snp_info <- unlist(strsplit(snp, split = "_"))
  # 
  # # ---- check D|G association, recode SNPs as necessary ---- #
  # dg_model <- lm(paste0("outcome ~ ", paste0(snp, "_dose"), "*", exposure, "+", paste0(covariates, collapse = "+")), data = ds)
  # if(dg_model$coefficients[2] < 0) {
  #   # flip dosages
  #   ds[, paste0(snp)] <- abs(ds[, paste0(snp)] - 2)
  #   ds[, paste0(snp, '_dose')] <- abs(ds[, paste0(snp, '_dose')] - 2)
  #   # flip genotype probabilities
  #   pp <- ds[,paste0(snp, "_p2")]
  #   ds[,paste0(snp, "_p2")] <- ds[, paste0(snp, "_p0")]
  #   ds[,paste0(snp, "_p0")] <- pp
  # }
  # 
  # # risk allele
  # if(dg_model$coefficients[2] < 0) {
  #   risk_allele = snp_info[3]
  # } else {
  #   risk_allele = snp_info[4]
  # }
  
  # output directory
  # output_dir <- paste0("/media/work/gwis/posthoc/", exposure, "/")
  
  # # SNP information
  # snp_info <- unlist(strsplit(snp, split = "_"))
  # 
  # # check allele frequency
  # total_dosage <- sum(ds[,snp])
  # total_alleles <- nrow(ds) * 2
  # aaf <- total_dosage / total_alleles
  # 
  # # ---- recode SNPs so that major allele is the reference group
  # if(aaf >= 0.5) {
  #   # flip dosages
  #   ds[, snp] <- abs(ds[, paste0(snp)] - 2)
  #   ds[, paste0(snp, '_dose')] <- abs(ds[, paste0(snp, '_dose')] - 2)
  #   # flip genotype probabilities
  #   pp <- ds[,paste0(snp, "_p2")]
  #   ds[,paste0(snp, "_p2")] <- ds[, paste0(snp, "_p0")]
  #   ds[,paste0(snp, "_p0")] <- pp
  #   # assign ref/alt allele
  #   ref_allele <- snp_info[4]
  #   alt_allele <- snp_info[3]
  # } else {
  #   ref_allele <- snp_info[3]
  #   alt_allele <- snp_info[4]
  # }
  # 
  # # create data subset
  # tmp1 = ds[, c('outcome', exposure, covariates)]
  # # tmp2 = ds[, grepl(snp, names(ds))]
  # tmp2 = ds[, grepl(paste(paste0(snp, c("_dose", "_p0", "_p1", "_p2")), collapse = "|"), names(ds))]
  # names(tmp2) <- c("dosage", "p0", "p1", "p2")
  # 
  # ds_tmp = cbind(tmp1, tmp2) %>% 
  #   na.omit(.[, c(exposure,'outcome', covariates, 'p1','p2','dosage')])
  
  
  
  # SNP information
  snp_info <- unlist(strsplit(snp, split = "_"))
  
  model_check <- glm(glue("outcome ~ {exposure}*{snp} + {glue_collapse(covariates, sep = '+')}"), family = 'binomial', data = ds)
  
  # ---- recode SNPs so that lower risk allele is reference (to match RERI output)
  if (model_check[[1]][snp] < 0) {
    # flip dosages
    ds[, snp] <- abs(ds[, paste0(snp)] - 2)
    ds[, paste0(snp, '_dose')] <- abs(ds[, paste0(snp, '_dose')] - 2)
    # flip genotype probabilities
    pp <- ds[,paste0(snp, "_p2")]
    ds[,paste0(snp, "_p2")] <- ds[, paste0(snp, "_p0")]
    ds[,paste0(snp, "_p0")] <- pp
    # assign ref/alt allele
    ref_allele <- snp_info[4]
    alt_allele <- snp_info[3]
  } else {
    ref_allele <- snp_info[3]
    alt_allele <- snp_info[4]
  }
  
  
  # create data subset
  tmp1 = ds[, c('outcome', exposure, covariates)]
  # tmp2 = ds[, grepl(snp, names(ds))]
  tmp2 = ds[, grepl(paste(paste0(snp, c("_dose", "_p0", "_p1", "_p2")), collapse = "|"), names(ds))]
  names(tmp2) <- c("dosage", "p0", "p1", "p2")
  
  ds_tmp = cbind(tmp1, tmp2) %>% 
    na.omit(.[, c(exposure,'outcome', covariates, 'p1','p2','dosage')])
  
  
  # start here.... #
  exposure_levels <- seq(1,4)
  res.pool.un = res.pool.g = res.pool.e = NULL
  Ncaco = data.frame(snps=snp,matrix(NA,ncol=6*4))
  colnames(Ncaco) = c('snps',paste0('Co',exposure_levels),paste0('Ca',exposure_levels),
                      paste0('Co1',exposure_levels),paste0('Ca1',exposure_levels),
                      paste0('Co2',exposure_levels),paste0('Ca2',exposure_levels))
  rownames(Ncaco) = snp
  
  # res.pool.un = res.pool.g = res.pool.e = NULL
  # Ncaco = data.frame(snps=snp,matrix(NA,ncol=6*2))
  # colnames(Ncaco) = c('snps',paste0('Co',1:2),paste0('Ca',1:2),
  #                     paste0('Co1',1:2),paste0('Ca1',1:2),
  #                     paste0('Co2',1:2),paste0('Ca2',1:2))
  # rownames(Ncaco) = snp
  
  
  #---- Calculate counts for each cell ----------------
  Ncaco[snp,c(paste0('Co',1:4),paste0('Ca',1:4))] = t(table(ds_tmp[,c('outcome',exposure)]))
  Ncaco[snp,c(paste0('Co1',1:4),paste0('Ca1',1:4))] = c(t(tapply(ds_tmp$p1, ds_tmp[,c('outcome',exposure)],sum,na.rm=T)))
  Ncaco[snp,c(paste0('Co2',1:4),paste0('Ca2',1:4))] = c(t(tapply(ds_tmp$p2, ds_tmp[,c('outcome',exposure)],sum,na.rm=T)))
  
  
  
  #-- Fit unrestricted model --------
  ds_tmp[,exposure] = factor(ds_tmp[,exposure]) # modeling q4 as factors
  tmp = as.vector(Model.all.new(ds_tmp,mod,exposure))
  res.pool.un = rbind(res.pool.un,data.frame(snp,exposure,t(tmp$GE)))
  res.pool.e = rbind(res.pool.e,data.frame(snp,exposure,t(tmp$E)))
  res.pool.g = rbind(res.pool.g,data.frame(snp,exposure,t(tmp$G)))
  
  ## organize results ######
  elvl=c(0:1) ; glvl = c(0,1,2)
  elvl=c(0:3) ; glvl = c(0,1,2)
  
  # elvl=c(0:3) ; glvl = c(0,1,2)
  

  colnames(res.pool.un) = c('snp','env',
                            paste0('beta',elvl[-1],'p0'),
                            paste0('se',elvl[-1],'p0'),
                            'beta0p1','beta0p2','se0p1','se0p2',
                            paste0('beta',elvl[-1],'p1'),paste0('se',elvl[-1],'p1'),
                            paste0('beta',elvl[-1],'p2'),paste0('se',elvl[-1],'p2'))
  res.pool.un = format_res(res.pool.un)
  
  
  ##== stratified by G results #######
  # colnames(res.pool.g) = c('snp','env',paste0(rep(c('beta0','se0','beta1','se1','beta2','se2'),each=1),
  #                                             rep(elvl[-1],6)))
  colnames(res.pool.g) = c('snp','env',
                           paste0('beta',elvl[-1], 'p0'), 
                           paste0('se',elvl[-1], 'p0'),
                           paste0('beta',elvl[-1],'p1'),
                           paste0('se',elvl[-1],'p1'),
                           paste0('beta',elvl[-1],'p2'),
                           paste0('se',elvl[-1],'p2'))
  res.pool.g = format_res(res.pool.g)
  
  ##== stratified by E results #######
  colnames(res.pool.e) = c('snp','env',
                           paste0('beta1',elvl),
                           paste0('se1',elvl),
                           paste0('beta2',elvl),
                           paste0('se2',elvl))
  res.pool.e = format_res(res.pool.e)
  
  ##== Put into table 
  OR.tab = ORtab(snp,elvl=elvl,glvl=glvl,res=res.pool.un)
  pval.tab = ptab(snp,elvl=elvl,glvl=glvl,res=res.pool.un)
  
  ORg.tab = matrix(as.character(unlist(c(as.vector(res.pool.g[,paste0('OR',elvl[-1],'p0')]),
                                         as.vector(res.pool.g[,paste0('OR',elvl[-1],'p1')]),
                                         as.vector(res.pool.g[,paste0('OR',elvl[-1],'p2')])))), ncol=3)
  pg.tab = matrix(as.character(unlist(c(as.vector(res.pool.g[,paste0('Ppval',elvl[-1],'p0')]),
                                        as.vector(res.pool.g[,paste0('Ppval',elvl[-1],'p1')]),
                                        as.vector(res.pool.g[,paste0('Ppval',elvl[-1],'p2')])))),ncol=3)
  colnames(ORg.tab) = colnames(pg.tab) = c(paste0('p',glvl))
  
  
  ORe.tab = matrix(as.character(unlist(c(res.pool.e[,paste0('OR1',elvl)],
                                         res.pool.e[,paste0('OR2',elvl)]))),ncol=2)
  pe.tab  = matrix(as.character(unlist(c(res.pool.e[,paste0('Ppval1',elvl)],
                                         res.pool.e[,paste0('Ppval2',elvl)]))),ncol=2,byrow=T)
  
  #== calculate counts for G=0 and put counts into format ca/co
  for(i in 1:4){
    eval(parse(text=paste0('Ncaco$caco0',i,"=paste0(round(Ncaco$Ca",i,'-Ncaco$Ca1',i,'-Ncaco$Ca2',i,
                           ",1),'/',round(Ncaco$Co",i,'-Ncaco$Co1',i,'-Ncaco$Co2',i,',1))')))
    eval(parse(text=paste0('Ncaco$caco1',i,'=paste0(round(Ncaco$Ca1',i,",1),'/',round(Ncaco$Co1",i,",1))")))
    eval(parse(text=paste0('Ncaco$caco2',i,'=paste0(round(Ncaco$Ca2',i,",1),'/',round(Ncaco$Co2",i,",1))")))
  }
  
  #=== write into table
  # est = cbind(rbind(OR.tab[1,],pval.tab[1,],OR.tab[2,],pval.tab[2,],
  #                   ORg.tab[1,],pg.tab[1,]),
  #             rbind(ORe.tab[1,],pe.tab[1,],ORe.tab[2,],pe.tab[2,],rep(NA,2),rep(NA,2)),
  #             N0 = c(Ncaco[snp,'caco01'],NA,Ncaco[snp,'caco02'],rep(NA,3)),
  #             N1 = c(Ncaco[snp,'caco11'],NA,Ncaco[snp,'caco12'],rep(NA,3)),
  #             N2 = c(Ncaco[snp,'caco21'],NA,Ncaco[snp,'caco22'],rep(NA,3)))
  # 
  
  est = cbind(rbind(OR.tab[1,],pval.tab[1,],
                    OR.tab[2,],pval.tab[2,],
                    OR.tab[3,],pval.tab[3,],
                    OR.tab[4,],pval.tab[4,],
                    ORg.tab[1,],pg.tab[1,],
                    ORg.tab[2,],pg.tab[2,],
                    ORg.tab[3,],pg.tab[3,]),
              rbind(ORe.tab[1,],pe.tab[1,],
                    ORe.tab[2,],pe.tab[2,],
                    ORe.tab[3,],pe.tab[3,],
                    ORe.tab[4,],pe.tab[4,],
                    rep(NA,2),rep(NA,2),rep(NA,2),rep(NA,2),rep(NA,2),rep(NA,2)),
              N0 = c(Ncaco[snp,'caco01'],NA,Ncaco[snp,'caco02'],NA,Ncaco[snp,'caco03'],NA,Ncaco[snp,'caco04'],rep(NA,7)),
              N1 = c(Ncaco[snp,'caco11'],NA,Ncaco[snp,'caco12'],NA,Ncaco[snp,'caco13'],NA,Ncaco[snp,'caco14'],rep(NA,7)),
              N2 = c(Ncaco[snp,'caco21'],NA,Ncaco[snp,'caco22'],NA,Ncaco[snp,'caco23'],NA,Ncaco[snp,'caco24'],rep(NA,7)))
  
  # colnames(est) <- c("G=0", "G=1", "G=2", "G=1", "G=2", "N0", "N1", "N2")
  # rownames(est) <- c("E=0", "p0", "E=1", "p1", "E=2", "p2", "E=3", "p3", "E1", "P01", "E2", "P02", "E3", "P03")
  
  colnames(est) <- c(paste0(ref_allele, ref_allele), 
                     paste0(ref_allele, alt_allele), 
                     paste0(alt_allele, alt_allele), 
                     paste0(ref_allele, alt_allele), 
                     paste0(alt_allele, alt_allele),
                     "N0", "N1", "N2")
  exposure_level <- levels(as.factor(ds[,exposure]))
  rownames(est) <- c(paste0(exposure,"=",exposure_level[1]), 
                     "p0",
                     paste0(exposure,"=",exposure_level[2]), 
                     "p1", 
                     paste0(exposure,"=",exposure_level[3]), 
                     "p2",
                     paste0(exposure,"=",exposure_level[4]), 
                     "p3",
                     paste0(exposure,"=",exposure_level[2], "(G)"), 
                     "p1 (G)", 
                     paste0(exposure,"=",exposure_level[3], "(G)"), 
                     "p2 (G)",
                     paste0(exposure,"=",exposure_level[4], "(G)"), 
                     "p3 (G)")
  
  
  
  # options(knitr.kable.NA = '')
  # 
  # kable(est) %>%
  #   kable_styling('bordered', bootstrap_options = c("striped", "hover", "condensed"), full_width = F, position = 'left') %>% 
  #   add_header_above(c(" " = 4, "G param by E" = 2, "Counts (Ca/Co)" = 3)) %>% 
  #   pack_rows("E param by G", 5, 6, indent = F) %>% 
  #   footnote(general = paste0("Risk allele - ", risk_allele)) %>% 
  #   save_kable(file = paste0(output_dir, "stratified_oddsratio_", snp, "_", exposure, ".html"), self_contained = T)
  
  saveRDS(est, file = paste0(output_dir, "stratified_oddsratio_", snp, "_", exposure, ".rds"))
  # return(est)
  
}
KimAE/figifs documentation built on March 10, 2021, 9:31 p.m.