# R/matrix_kn.R In vinny-paris/the_knockoff_of_a_knockoff: A Knockoff of A Knockoff

#### Documented in matrix_kn

#' A slightly modified version of the knockoff function
#'
#'
#' 2 major differences being that the knockoff variables
#' are to be passed directly to the function, not made inside
#' of it; and secondly you can pass the amount of cores wanted
#' directly to the statistic function chosen. Be careful as this
#' may need to be 'NULL'-ed if the statistic function you choose
#' does not allow for multi-cores.
#'
#' @export
#' @param resp The response variable, a matrix the same length as expl The first col must be the identifier
#' @param expl The data frame to be the explanatory variables, the first col must be the identifier
#' @param Xko The knockoff variables created seperately (see the 'knockoff' package for more details). Also must be the same length as expl, resp.
#' @param offset The offset is used to control conservative-ness. 0 for liberal and best for smaller data sets, 1 for conservative and larger data sets. 1 can be a very poor choice if you do not have many, many explanatory variables. Passed to the my_kn function.
#' @param fdr The "False Discovery Rate", allowed to be between 0,1 (non-inclusive). Defaults to .2, implying that 1/5 of variables returned will be a type 1 error. This will be passed to the my_kn function.
#' @param cores The cores to be used in the doMC section of the code, passed to the my_kn function.
#' @param ... Various other arguments to be passed to the my_kn function.
#'
#' @return This will return of data frame 5 columns.
#'\itemize{
#'\item{Resp: } What ever response variable was being tested
#'\item{Expl: } Gives the name of the signficant explanatory variable chosen
#'\item{Est: } Estimate of the glm model, defaults to normal
#'\item{se: } Gives the standard error of the parameter estimate from the glm
#'\item{p-value: } Gives the p-value associated with that variable estimate from the glm for the null hypothesis the effect magnitude is 0.
#'}
#'
#'@note Cores will be rounded to be a natural number

matrix_kn <- function(resp, expl, Xko, offset = 0, fdr = .2, cores = 2, ...){

#error messages
if(!all.equal(dim(expl),dim(Xko))) {stop("Xkl and expl are of differing size, please correct!")}
if(dim(resp)[1] != dim(expl)[1]) {stop("Response variable and explanatory variable have differing row sizes, please correct!")}
if(offset != 1 & offset != 0) {stop("Offset needs to be 1 or 0, please correct!")}
if(fdr < 0 | fdr > 1) {stop("False Discovery Rate (fdr) must be between 0, 1 non-inclusive")}
if(sum(resp[,1] %in% expl[,1]) == 0) {stop("Improper identifiers, no matches found. Please correct!")}

#rounding of cores
cores <- round(cores)

#relabel the identifiers
colnames(resp)[1] <- "ID"
colnames(expl)[1] <- "ID"

#create storage matrix for results
res.cache <- data.table(
phenotype=rep("", res.cache.nrow),
snp="", est=0.0, se=0.0, p_value=0.0)
res.cache.idx <- 0

#get name sof explanatory variables
expl_names <- setdiff(names(expl), "ID")

for (phename in colnames(resp)[-1]){

#reduce the response matrix to ID and a single response variable
diti <- data.frame(resp[,1], resp[,phename])
colnames(diti) <- c("ID", phename)

#correct for missing identifiers
diti <- diti[diti$ID %in% expl$ID,]

#Find values in matrix such that we have resps to
non_missing <- !is.na(diti[,phename])

#Collect rows based on above non-missing resp rows
doto <- diti[non_missing, -1]
kn_var <- Xko[non_missing,]
covar <- expl[non_missing, -1]

#Do the anlaysis and collect the results
kn <- my_kn((covar), doto, kn_var, offset = 0, fdr = .9)
chosen <- kn$selected chosen #Return NA's or run the glm over chosen variables depending on #which variables (if any) were chosen by my_kn holding <- NULL if(length(chosen) == 0) { result <- t(rep(NA, 4)) } else{ result <- NULL stuff <- matrix(unlist(data[,names(chosen)]), nrow = dim(data)[1]) colnames(stuff) <- names(chosen) model <- glm( doto ~ as.matrix(expl[,chosen]), family = gaussian(), data = covar, na.action=na.exclude) result <- as.data.frame(summary(model)$coefficients)[-1,]
}

expl_names <- names(chosen)
if(length(chosen) == 0) {expl_names <- "No Sig. Expl. Var."}

#Combine results into a single data frame listing resp, expl var., est, se, p-value
result <- as.matrix(result, ncol = 4)
for(i in 1:(dim(result)[1])){
res.cache.idx <- res.cache.idx + 1
res.cache[res.cache.idx, ] <- list(resp = phename,
expl = expl_names[i],
est = result[i,1],
se = result[i,2],
p_value = result[i,4])
}
}
return(res.cache)
}


vinny-paris/the_knockoff_of_a_knockoff documentation built on May 26, 2019, 12:32 p.m.