In this file, I start generating the Bayesian Network from our data using the gRain
package.
knitr::opts_chunk$set(echo = TRUE) library(gRain) library(tidyverse) library(data.table) ## load in data generation scripts library(cvdRiskData)
The first variable we add is age
. Then I add smoking
, which is associated with hypertension.
yn <- c("N","Y") #this is the true conditional prob table for HTNtreat given HTN status #condHTNtreat <- t(HTNtreat/rowSums(HTNtreat)) #I adjusted according to David's suggestions condHTNtreat <- t(matrix(nrow=2,c(100,0,40,60), byrow = TRUE, dimnames=list(yn,yn))) ## Age probabilities are calculated from our cohort ageLevels <- c("0-20","20-40", "40-55", "55-70","70-90") ageProbMat <- read.csv(system.file("cpt", "age_probabilities.csv", package="cvdRiskData")) ageProb <- ageProbMat$Freq names(ageProb) <- ageProbMat$out ageProb a <- cptable(~age, values=ageProb*100, levels=ageLevels) ## These conditional probabilities are derived from our cohort condHTNAgeprob <- read.csv(system.file("cpt", "age_htn_cpt.csv", package="cvdRiskData"), check.names = FALSE) ## Here are the conditional age|htn probabilities condHTNAgeprob rownames(condHTNAgeprob) <- c(0,1) colnames(condHTNAgeprob) <- ageLevels ## Compile our CPTs so far a.htn <- cptable(~htn+age, values=as.matrix(condHTNAgeprob), levels=yn) htn.treat <- cptable(~treat+htn, values=condHTNtreat, levels=yn) ##Smoking|age CPT - peaks at 25% smoking for 55-70 category a.s <- cptable(~smoking+age, values=c(95,5,90,10,85,15,75,25,90,10),levels=yn) ##Smoking htn, if you smoke, you have 67% prob of having hypertension s.h <- cptable(~htn+smoking, values=c(50,50,33,67))
Now we load in probabilities for race, which was calculated from our patient cohort.
pRaceMat <- read.csv(system.file("cpt", "race_probabilities.csv", package="cvdRiskData")) pRace <- pRaceMat$Freq names(pRace) <- pRaceMat$Var1 pRace race <- cptable(~race, values=pRace, levels=names(pRace)) #These probabilities are estimated for US patients overall (from CDC report) bmiLevels <- c("15-18","18-25", "25-31","31+") bmiProb <- c(0.16, 0.68, 0.10, 0.06) bmi <- cptable(~bmi, values=bmiProb, levels=bmiLevels) #conditional CPT for BMI given race #these values were estimated from CDC report #If you are Black/AfAm or White, have a higher #probability of being in a higher BMI category valuesBMI <- c(97, 3, 98, 1, 98, 2, 99, 1, 88, 12, 92,8, 92, 8, 96, 4, 55, 45, 70, 30, 70, 30, 85, 15, 20, 80, 33, 66, 33, 66, 65, 35) ##t2d.race.bmi is a joint cpt: p(t2d|race,bmi) t2d.race.bmi <- cptable(~t2d|race:bmi, values=c(valuesBMI), levels=yn) #t2d.cvd <- cptable(~cvd|t2d, values=c(95, 5, 5, 95), levels= yn) ##incidence of smoking is 15% smoking <- cptable(~smoking, values=c(85, 15), levels=yn)
These first 2 snps are on the same chromosome and so I will model them as having identical risk, and having both does not increase risk. I am limiting myself to the homozygous variants (excluding the heterozygous variants) in order to simplify things. I also only consider 6 genotypes overall.
Rs10757278 (Chr 9, 22124478). (A;A) 0.78 x risk of heart disease, (A;G) 1.3 x risk, (G;G) 1.6x risk for heart disease. Co-morbidity: diabetes
Rs1333049 (Chr 9, 22125504). (G;G) Normal (WT); (C;G) 1.5 increased risk; (C;C) 1.9x increased risk.
These last 2 snps are on different chromosomes for the first. Again, limiting to the homozygous cases to simplify things.
Rs4665058, 4x risk if have two copies of the SNP (A;A), 2 X (A;C) and (C;C) - wild type. Chr 2, 159333698
rs8055236 (Chr 16, 83178793) (T;T) Normal (WT); (G;T) 1.9x risk, (G;G) 2.2x increased risk
raceNames <- names(pRace) #Conversion table between aggregate genotype and SNP values snpLookup <- list("0000" = c("AA", "CC", "CC", "TT"), "0001" = c("AA", "CC", "CC", "GG"), "0010" = c("AA", "CC", "AA", "TT"), "1100" = c("GG", "GG", "CC", "TT"), "1110" = c("GG", "GG", "AA", "GG"), "1111" = c("GG", "GG", "AA", "GG")) snpNames <- c("rs10757278", "rs1333049", "rs4665058", "rs8055236") snpLookup <- lapply(snpLookup, function(x) {names(x) <- c("rs10757278", "rs1333049", "rs4665058", "rs8055236"); return(x)}) rs10757278race = matrix(nrow=2, byrow = TRUE, data=c(21, 24, 80, 30, 79, 76, 20, 70), dimnames = list( c("AA","GG"),raceNames))/100 r1 <- rs10757278race #p("AA"|race) = c(10, 10, 10, 10) #p("CC"|race) = c(80,80,80,80) rs4665058race = matrix(nrow=2, data= c(10, 10, 10, 10, 90,90,90,90), byrow=TRUE, dimnames= list(c("AA","CC"), raceNames ))/100 r2 <- rs4665058race #p("GG"|race) = c(62, 65, 20, 62) #P("TT"|race) = c(38, 35, 80, 38) rs8055236race = matrix(nrow=2, byrow=TRUE, data=c(62, 65, 20, 40, 38, 35, 80, 60), dimnames = list(c("GG","TT"), raceNames))/100 r3 <- rs8055236race genotypes <- c("1111", "1110", "1100", "0010", "0001", "0000") probList <- list() probList[["0000"]] <- r1[2,] * r2[2,] * r3[2,] probList[["1100"]] <- r1[1,] * r2[2,] * r3[2,] probList[["1110"]] <- r1[1,] * r2[1,] * r3[2,] probList[["0010"]] <- r1[2,] * r2[1,] * r3[2,] probList[["0001"]] <- r1[2,] * r2[2,] * r3[1,] probList[["1111"]] <- r1[1,] * r2[1,] * r3[1,] cptgenoRace <- do.call(rbind,probList) cptgenoRace <- cptgenoRace[genotypes,] race.genotype <- cptable(~genotype|race, values=cptgenoRace, levels=genotypes, normalize=TRUE) ##Gender is unbalanced gender <- cptable(~gender, c(45,55), levels=c("M","F")) ##Two SNPs are associated with higher cholesterol tcholStates <- c("<160", "160-199", "200-239","240+") genotype.tchol <- cptable(~tchol|genotype, values = c(5,10,30,55, 10,20,30,40, 15,20,45,30, 15,20,45,30, 30,60,5,5, 30,60,5,5), levels=tcholStates)
Finally, we compile our Bayesian Network and save it.
plist <- compileCPT(list(a, a.htn, htn.treat, smoking, s.h, race, bmi, t2d.race.bmi, race.genotype, genotype.tchol, gender)) cvd_bayes_net <- compile(grain(plist)) devtools::use_data(cvd_bayes_net, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.