# R/gewistLevene.r In GEWIST: Gene Environment Wide Interaction Search Threshold

#### Documented in gewistLevene

```################################## GENE X ENVIRONMENT_FIXED EFFECT SIZES #################

# Outputs the optimal prioritization threshold for testing genetic interactions

gewistLevene <- function(p, N, theta_gc, theta_c, M, K = 20000,verbose=FALSE){

## Input the known parameters (variance explained ~ (0,1))
## Assume Gene x Environment

### check inputs

if (p > 0.5 | p <= 0) stop("minor allele frequency should be a number between 0 and 0.5")

if (!(N > 0 | M > 0 | K > 0 | theta_gc >0 | theta_c > 0)) stop( "negative input values are not allowed")

if (!(theta_gc < 1 | theta_c < 1 )) stop(" variance explained should be a number between 0 and 1 ")

N <- round(N)
M <- round(M)
K <- round(K)

### calculate beta coefficients

b2 <- sqrt(( theta_c )/( 1 - theta_gc - theta_c ))

b3 <- sqrt(theta_gc/(2*p*(1 - p)*( 1 - theta_gc - theta_c )))

### sample sizes

n1 <- round(N*(1 - p)^2)
n2 <- round(N*(1 - p)*p*2)
n3 <- N - n1 - n2

############ sample the variance of covariate and error per genotype

var_C_G_1 <- rchisq(K,n1 - 1)/(n1 - 1)
var_C_G_2 <- rchisq(K,n2 - 1)/(n2 - 1)
var_C_G_3 <- rchisq(K,n3 - 1)/(n3 - 1)

error_G_1 <- rchisq(K,n1 - 1)/(n1 - 1)
error_G_2 <- rchisq(K,n2 - 1)/(n2 - 1)
error_G_3 <- rchisq(K,n3 - 1)/(n3 - 1)

########### sample the b_x's

b_x1 <- rnorm(K,b2 + b3*(   - 2*p), sqrt(1/(var_C_G_1*(n1 - 1))))
b_x2 <- rnorm(K,b2 + b3*(1 - 2*p),sqrt(1/(var_C_G_2*(n2 - 1))))
b_x3 <- rnorm(K,b2 + b3*(2 - 2*p),sqrt(1/(var_C_G_3*(n3 - 1))))

########### the covariance of error term and covariate per genotype

cov_error_C_1 <- (b_x1 - (b2 + b3*(   - 2*p)))*var_C_G_1
cov_error_C_2 <- (b_x2 - (b2 + b3*(1 - 2*p)))*var_C_G_2
cov_error_C_3 <- (b_x3 - (b2 + b3*(2 - 2*p)))*var_C_G_3

sum_of_C_square_G <- var_C_G_1*n1*( - 2*p) + var_C_G_2*n2*(1 - 2*p) + var_C_G_3*n3*(2 - 2*p)
sum_of_C_G_square <- var_C_G_1*n1*( - 2*p)^2 + var_C_G_2*n2*(1 - 2*p)^2 + var_C_G_3*n3*(2 - 2*p)^2
sum_C_2 <- var_C_G_1*n1 + var_C_G_2*n2 + var_C_G_3*n3

total_cov_error <- ( - 2*p)*n1*cov_error_C_1 + (1 - 2*p)*n2*cov_error_C_2 + (2 - 2*p)*n3*cov_error_C_3
total_cov	 <- n1*cov_error_C_1 + n2*cov_error_C_2 + n3*cov_error_C_3

############ observed b2s

beta2 <- b2 + (sum_of_C_square_G*total_cov_error + total_cov*sum_of_C_G_square)/(sum_C_2*sum_of_C_G_square + sum_of_C_square_G^2)
beta3 <- b3 + ((b2 - beta2)*sum_of_C_square_G + total_cov_error)/(sum_of_C_G_square)

########### Thus we can obtain the total variance per genotype

var_group_1 <- abs(var_C_G_1*(b2 + b3*(   - 2*p))^2 + error_G_1 + 2*cov_error_C_1*(b2 + b3*(   - 2*p)))
var_group_2 <- abs(var_C_G_2*(b2 + b3*(1 - 2*p))^2 + error_G_2 + 2*cov_error_C_2*(b2 + b3*(1 - 2*p)))
var_group_3 <- abs(var_C_G_3*(b2 + b3*(2 - 2*p))^2 + error_G_3 + 2*cov_error_C_3*(b2 + b3*(2 - 2*p)))

vaj_1 <- ((b2 + b3*(   - 2*p))^2 + 1)*((n1 - 1) - 2*(gamma(1/2)/beta(1/2,(n1 - 1)/2))^2)/(n1 - 1)
vaj_2 <- ((b2 + b3*(1 - 2*p))^2 + 1)*((n2 - 1) - 2*(gamma(1/2)/beta(1/2,(n2 - 1)/2))^2)/(n2 - 1)
vaj_3 <- ((b2 + b3*(2 - 2*p))^2 + 1)*((n3 - 1) - 2*(gamma(1/2)/beta(1/2,(n3 - 1)/2))^2)/(n3 - 1)

sd_sam1 <- sqrt(abs((pi - 2)*(var_group_1)/(pi*(n1 - 1)) - 2/pi*vaj_1))
sd_sam2 <- sqrt(abs((pi - 2)*(var_group_2)/(pi*(n2 - 1)) - 2/pi*vaj_2))
sd_sam3 <- sqrt(abs((pi - 2)*(var_group_3)/(pi*(n3 - 1)) - 2/pi*vaj_3))

z_value1 <- rnorm(K,sqrt(2*(var_group_1)/pi),sd_sam1)
z_value2 <- rnorm(K,sqrt(2*(var_group_2)/pi),sd_sam2)
z_value3 <- rnorm(K,sqrt(2*(var_group_3)/pi),sd_sam3)

z_value <- (n1*z_value1 + n2*z_value2 + n3*z_value3)/N

denominator  <- 2*((n1 - 1)*var_group_1*(1 - 2/pi) + (n2 - 1)*var_group_2*(1 - 2/pi) + (n3 - 1)*var_group_3*(1 - 2/pi))
numerator   <- (N - 3)*((n1)*(z_value1 - z_value)^2 + (n2)*(z_value2 - z_value)^2 + (n3)*(z_value3 - z_value)^2)

levene_p <- 1 - pf(numerator/denominator,df1 = 2,df2 = N - 3)

############ Calculate interation p values

RSS1 <- error_G_1 + (b2 + b3*(   - 2*p) - beta2 - beta3*(    -2*p))^2*var_C_G_1 + 2*(b2 + b3*(    -2*p) - beta2 - beta3*(   - 2*p))*cov_error_C_1
RSS2 <- error_G_2 + (b2 + b3*(1 - 2*p) - beta2 - beta3*(1 - 2*p))^2*var_C_G_2 + 2*(b2 + b3*(1 - 2*p) - beta2 - beta3*(1 - 2*p))*cov_error_C_2
RSS3 <- error_G_3 + (b2 + b3*(2 - 2*p) - beta2 - beta3*(2 - 2*p))^2*var_C_G_3 + 2*(b2 + b3*(2 - 2*p) - beta2 - beta3*(2 - 2*p))*cov_error_C_3

f_stats <- 2*beta3^2*p*(1 - p)*(N - 4)^2/(RSS1*(n1 - 1) + RSS2*(n2 - 1) + RSS3*(n3 - 1))

interaction_p <- 1 - pf(f_stats,df1 = 1,df2 = N - 4)

########## Calculate powers

result <- data.frame("p-value_Cut-offs" = NA, "VP_power" = NA)

for (i in 1:1000){
power <- mean(levene_p < i/1000 & interaction_p<0.05/(M*i/1000))
result[i,] <- c( i/1000,power)
}

conv_power <- power
optimal_power <- max(result[,2])
optimal_p_threshold <- which.max(result[,2])/1000

if (verbose){return(result)
}else {

return(list("Conventional_power"=conv_power,
"Optimal_VP_power"=optimal_power,"Optimal_pval_threshold"=optimal_p_threshold))

}
}

############## End of Script
```

## Try the GEWIST package in your browser

Any scripts or data that you put into this service are public.

GEWIST documentation built on Nov. 8, 2020, 5:46 p.m.