Description Author(s) References See Also Examples
The following code is the best starting point for those who want to perform cis-/trans-eQTL analysis with Matrix eQTL.
Andrey A Shabalin andrey.shabalin@gmail.com
The package website: http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
See Matrix_eQTL_engine
for reference and other sample code.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | # Matrix eQTL by Andrey A. Shabalin
# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
#
# Be sure to use an up to date version of R and Matrix eQTL.
# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = find.package('MatrixEQTL');
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste0(base.dir, "/data/SNP.txt");
snps_location_file_name = paste0(base.dir, "/data/snpsloc.txt");
# Gene expression file name
expression_file_name = paste0(base.dir, "/data/GE.txt");
gene_location_file_name = paste0(base.dir, "/data/geneloc.txt");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = paste0(base.dir, "/data/Covariates.txt");
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 2e-2;
pvOutputThreshold_tra = 1e-2;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1; # one row of column labels
snps$fileSkipColumns = 1; # one column of row labels
snps$fileSliceSize = 2000; # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name);
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t"; # the TAB character
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1; # one row of column labels
gene$fileSkipColumns = 1; # one column of row labels
gene$fileSliceSize = 2000; # read file in slices of 2,000 rows
gene$LoadFile(expression_file_name);
## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t"; # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1; # one row of column labels
cvrt$fileSkipColumns = 1; # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
unlink(output_file_name_tra);
unlink(output_file_name_cis);
## Results:
cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
cat('Detected local eQTLs:', '\n');
show(me$cis$eqtls)
cat('Detected distant eQTLs:', '\n');
show(me$trans$eqtls)
## Make the histogram of local and distant p-values
plot(me)
|
Rows read: 15 done.
Rows read: 10 done.
Rows read: 2 done.
Matching data files and location files
10 of 10 genes matched
15 of 15 SNPs matched
Task finished in 0.004 seconds
Processing covariates
Task finished in 0.003 seconds
Processing gene expression data (imputation, residualization, etc.)
Task finished in 0.006 seconds
Creating output file(s)
Task finished in 0.011 seconds
Performing eQTL analysis
100.00% done, 2 cis-eQTLs, 3 trans-eQTLs
Task finished in 0.013 seconds
Analysis done in: 0.028 seconds
Detected local eQTLs:
snps gene statistic pvalue FDR beta
1 Snp_05 Gene_03 38.812160 5.515519e-14 5.515519e-12 0.4101317
2 Snp_04 Gene_10 3.201666 7.608981e-03 3.804491e-01 0.2321123
Detected distant eQTLs:
snps gene statistic pvalue FDR beta
1 Snp_13 Gene_09 -3.914403 0.002055817 0.1027908 -0.2978847
2 Snp_11 Gene_06 -3.221962 0.007327756 0.1619451 -0.2332470
3 Snp_14 Gene_01 3.070005 0.009716705 0.1619451 0.2147077
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.