source("R/symmetrcize.R") source("R/estimate_betas.R") ld_matrix = as.matrix(fread("data/LDcorr_452_5523.ld", header=F)) ld_matrix = symmetricize(ld_matrix) frequencies = read.table("data/CM6_Cauc-NEK1_imputed-LDcorr5523.snpdat", header=T) corr = read.table("data/CM6_Cauc-NEK1_imputed-plink452_5523_GCTA_no_covar.ma", header=T) freq_af = merge(corr,frequencies,by=1) freq_af = freq_af[order(freq_af$POS),] new_betas = logistic_to_linear(b = freq_af$beta, freq_af$Z,phi = 0.670354, ref_af=freq_af$FREQ1) freq_af$beta = new_betas[,1] freq_af$se = new_betas[,2] alpha = log(.67/(1-.67))

library(plink2R) dat = read_plink("data/CM6_Cauc-NEK1_imputed_plink452_5523")

freq_af$VAR = apply(dat$bed,2,function(x){var,na.rm=T})

freq_af$N = freq_af$N - t(t(apply(dat$bed,2,function(x){sum(is.na(x))}))) #alpha = log(.33/(1-.33)) freq_af$VAR = apply(dat$bed,2,function(x){var(x,na.rm=T)}) betas = freq_af$beta*exp(alpha)/((1+ exp(alpha))^2) res_preparation = prep_dataset_common(data_set = freq_af,ld_matrix= ld_matrix,hwe_variance = T) stepwise_results2 = stepwise_conditional_run(res_preparation = res_preparation,p_value_threshold = 9.186111e-06,colinear_threshold = 0.9) all_but_one_df = all_but_one(res_preparation=res_preparation,stepwise_results = stepwise_results ) res_preparation = prep_dataset_common(data_set = freq_af,ld_matrix= ld_matrix,hwe_variance = T) conditional_from_ids(rsids = c("rs10520157","rs6831487"),res_preparation,return_only_these = T) df$res_step$p_new dos_data = setDF(fread("data/NEK1_Cauc_452_dos.txt", header=T)) rsids =10:ncol(dos_data)

dos_data$RANDOM = dos_data$ENSG00000137601 + rnorm(n=nrow(dos_data),0,2) dos_data$rs10520157_random = dos_data$ENSG00000137601 + rnorm(n=nrow(dos_data),0,0.05)

dos_data$RANDOM_BIN = rbinom(dos_data$ENSG00000137601 > 6,0.7,1)

dos_data$RANDOM_BIN = dos_data$ENSG00000137601 > 6

dos_data$RANDOM_BIN = ifelse(rbinom(n=nrow(dos_data),1,0.9), dos_data$RANDOM_BIN, 1- dos_data$RANDOM_BIN)

betas = c() ses = c() m1 = summary(lm(dos_data$RANDOM_BIN ~ dos_data$rs10520157 )) (m1)
m1 = summary(glm(dos_data$RANDOM_BIN ~ dos_data$rs10520157, family=binomial )) (m1)
for(i in rsids){ m1 = summary(glm(dos_data$RANDOM_BIN ~ dos_data$rs10520157 + dos_data[,i], family=binomial)) if (colnames(dos_data)[i] != "rs10520157"){ betas = c(betas,coef(m1)[3,1]) ses = c(ses,coef(m1)[3,2]) }else{ betas = c(betas,NA) ses = c(ses,NA) } } beta_unadj =c() se_unadj = c() for(i in rsids){ m1 = summary(glm(dos_data$RANDOM_BIN ~ dos_data[,i], family=binomial)) beta_unadj = c(beta_unadj,coef(m1)[2,1]) se_unadj = c(se_unadj,coef(m1)[2,2]) } f = (cbind(beta_unadj,betas)) f = f[!is.na(betas),] cor(f,method ="pearson")

64.5 %correlated

gg = table(dos_data$RANDOM_BIN)[1]/sum(table(dos_data$RANDOM_BIN)) freq_af$beta = beta_unadj freq_af$se = se_unadj freq_af$Z = beta_unadj/se_unadj res_preparation= prep_dataset_common(data_set = freq_af,ld_matrix= ld_matrix,hwe_variance = F) stepwise_results2 = stepwise_conditional_run(res_preparation = res_preparation,p_value_threshold = 1e-4,colinear_threshold = 0.9) df = conditional_from_ids(rsids = "rs10520157", res_preparation) g = cbind(df$res_step$beta_new/df$res_step$se_new, betas/ses) g = g[!is.na(g[,1]),] cor(g[,1],g[,2],method="spearman") summary(lm(g[,2] ~ g[,1]))

all_but_one_df = all_but_one(res_preparation=res_preparation,stepwise_results = stepwise_results )

res_preparation = prep_dataset_common(data_set = freq_af,ld_matrix= ld_matrix,hwe_variance = F) stepwise_results2 = stepwise_conditional_run(res_preparation = res_preparation,p_value_threshold = 9.186111e-06,colinear_threshold = 0.9)

g = cbind(df$res_step$beta_new/df$res_step$se_new, betas/ses) g = g[!is.na(g[,1]),] cor(g[,1],g[,2],method="spearman") summary(lm(g[,2] ~ g[,1]))

freq_af = merge(corr,frequencies,by=1)

freq_af = freq_af[order(freq_af$POS),] f new_betas = logistic_to_linear(b = freq_af$beta, freq_af$Z,phi = gg, ref_af=freq_af$FREQ1) freq_af$beta = new_betas[,1] freq_af$se = new_betas[,2] freq_af$Z = freq_af$beta/freq_af$se res_preparation = prep_dataset_common(data_set = freq_af,ld_matrix= ld_matrix,hwe_variance = F) stepwise_results2 = stepwise_conditional_run(res_preparation = res_preparation,p_value_threshold = 9.186111e-06,colinear_threshold = 0.9) df = conditional_from_ids(rsids = "rs10520157", res_preparation) g = cbind(df$res_step$beta_new, betas)

g = g[!is.na(g[,1]),] cor(g[,1],g[,2], method="spearman") g3 = linear_to_logistic(b = df$res_step$beta_new,df$res_step$se_new, phi=gg, theta=freq_af$FREQ1) g = cbind(betas/ses, g3[,1]/g3[,2]) g = g[!is.na(g[,1]),] cor(g[,1],g[,2], method="spearman")



theboocock/coco documentation built on May 31, 2019, 9:10 a.m.