R/genotype.R

Defines functions genall anSNP goodSNP genus

Documented in anSNP genall genus goodSNP

# genotyping for 450k

# convert  vector of betas to numeric genotypes (1,2,3,NA)
# when beta is high that is allele A so should be coded 1
genme <- function (y, peaks = c(0.8, 0.5, 0.2)) 
{
   cl <- try(kmeans(as.numeric(y), peaks), silent = TRUE)
   if (inherits(cl, "try-error")) {
      rep(NA, length(y))
   }
   else {
      cl$clus
   }
}

# produce numeric genotypes from a matrix or df of betas
genus <- function(m){
    t(apply(m,1,genme))
}

# good SNP list from KS sscreen
goodSNP <- function(){
   c(
"cg09885502", "cg04131969", "cg25099095", "cg26077133", "cg26820259", 
"cg05338731", "cg04990378", "cg11420142", "cg14859874", "cg26705599", 
"cg02658043", "cg20976286", "cg11400162", "cg18828306", "cg25465065", 
"cg02464073", "cg21587006", "cg06032337", "cg02113055", "cg13078798", 
"cg25388952", "cg01350803", "cg18861767", "cg05900567", "cg09281805", 
"cg13251750", "cg15800276", "cg03900028", "cg24018148", "cg05730108", 
"cg22508145", "cg19393008", "cg27481428", "cg21589417", "cg16112880", 
"cg10415021", "cg16963093", "cg08250118", "cg13564529", "cg02819655", 
"cg02401352", "cg10117599", "cg16702660", "cg26217827", "cg03796003", 
"cg25977769", "cg17040924", "cg25203245", "cg24844518", "cg11716267", 
"cg21046080", "cg08238319", "cg15988569", "cg18816122", "cg14179288", 
"cg06971224", "cg27230769", "cg09533869", "cg11738485", "cg08570077", 
"cg18285337", "cg06520095", "cg22851875", "cg02877261", "cg00033213", 
"cg09120722", "cg24643105", "cg24694833", "cg02299007", "cg12657416", 
"cg02507579", "cg27056740", "cg23159970", "cg13275129", "cg06052372", 
"cg23727079", "cg27079096", "cg23489384", "cg18673341", "cg04888234", 
"cg04814784", "cg22274273", "cg18572898", "cg01016092", "cg26069044", 
"cg22505202", "cg03075889", "cg09247979", "cg16792234", "cg21294301", 
"cg07703391", "cg10482512", "cg22274196", "cg06264882", "cg13815695", 
"cg25347356", "cg19791271", "cg07474670", "cg26278987", "cg01876809", 
"cg07216619", "cg03706056", "cg00443543", "cg13989295", "cg20775316", 
"cg00631877", "cg15600437", "cg08253809", "cg19707653", "cg05656210", 
"cg19021236", "cg05813498", "cg20089799", "cg19518539", "cg07741840", 
"cg05340866", "cg15075357", "cg11290949", "cg26642774", "cg07456585", 
"cg05393861", "cg26422465", "cg04683516", "cg02907150", "cg08049519", 
"cg11229715", "cg03639185", "cg07056794", "cg16655343", "cg17095460", 
"cg21927991", "cg00696044", "cg00123214", "cg05416337", "cg12568536", 
"cg20536971", "cg16814680", "cg12908908", "cg11008123", "cg15574437", 
"cg21242448", "cg04520693", "cg11164659", "cg12087615", "cg04610028", 
"cg07501029", "cg27467876", "cg15132295", "cg25879395", "cg17196155", 
"cg19577958", "cg07792871", "cg17393140", "cg12515659", "cg10140678", 
"cg15360451", "cg19285525", "cg01521131", "cg24136292", "cg16748433", 
"cg04156077", "cg08657228", "cg23052585", "cg05126514", "cg08603678", 
"cg21028319", "cg16995742", "cg24309769", "cg02772880", "cg10155537", 
"cg06093861", "cg20756026", "cg16120147", "cg07133434", "cg14192979", 
"cg07128503", "cg19539986", "cg18088486", "cg22337626", "cg12036633", 
"cg08880261", "cg08002427", "cg25134647", "cg01236565", "cg02479782", 
"cg07227024", "cg10818676", "cg14241748", "cg00366603", "cg22953510", 
"cg01491428", "cg09307883", "cg15567368", "cg21149357", "cg03754882", 
"cg15083522", "cg13612055", "cg08101922", "cg06711418", "cg23836570", 
"cg20485607", "cg02945674", "cg17341969", "cg01463139", "cg25601713", 
"cg04998327", "cg09182085", "cg02814135", "cg09993319", "cg03221390", 
"cg19787013", "cg04467639", "cg13215060", "cg20040891", "cg14361804", 
"cg16046605", "cg14651435", "cg16435686", "cg19178509", "cg03329597", 
"cg10942914", "cg01188578", "cg17876294", "cg21498547", "cg02316445", 
"cg08373250", "cg06002687", "cg17628491", "cg21575308", "cg24926791", 
"cg23681001", "cg26757229", "cg02823329", "cg05407200", "cg08950364", 
"cg21130926", "cg14195178", "cg00295418", "cg12386614", "cg00424152"
)
}

anSNP <- function(){
c(
"rs10033147", "rs1019916", "rs1040870", "rs10457834","rs10774834", 
"rs10796216", "rs10846239","rs10882854","rs10936224","rs11034952", 
"rs11249206", "rs13369115","rs133860",  "rs1414097", "rs1416770" , 
"rs1467387",  "rs1484127", "rs1495031", "rs1510189", "rs1510480" , 
"rs1520670",  "rs1941955", "rs1945975", "rs2032088", "rs2125573" , 
"rs213028",   "rs2208123", "rs2235751", "rs2385226", "rs2468330" , 
"rs2521373",  "rs264581",  "rs2804694", "rs2857639", "rs2959823" , 
"rs348937",   "rs3818562", "rs3936238", "rs4331560", "rs472920"  , 
"rs4742386",  "rs5926356", "rs5931272", "rs5936512", "rs5987737" , 
"rs6426327",  "rs6471533", "rs654498",  "rs6546473", "rs6626309" , 
"rs6982811",  "rs6991394", "rs715359",  "rs739259",  "rs7660805" , 
"rs7746156",  "rs798149",  "rs845016",  "rs877309",  "rs9292570" , 
"rs9363764",  "rs939290",  "rs951295",  "rs966367",  "rs9839873"
)
}

# genotype everything available from an object with a betas() method
genall <- function( object, locs=c(anSNP(),goodSNP())){
  betas <- betas(object)
  if (length(grep('_', head(featureNames(object), n = 10L)))==10){
      betas <- epicv2clean(betas)
  }
   locs <- locs[ locs %in% rownames(betas)]
   genus(betas[locs,])
}
schalkwyk/wateRmelon documentation built on April 15, 2024, 12:06 p.m.