R/mdr.r
In MDR: Detect gene-gene interactions using multifactor dimensionality reduction

Documented in boot.error compare mdr mdr.3WS mdr.ca.adj mdr.cv mdr.hr permute.mdr plot.mdr predict.mdr summary.mdr

############ Code for Rmdr function; modify this to incorporate 3-way split

require(lattice)

compare<-function(mat,vec,k) {  #this function is for internal use only; it matches an individuals data vector with a target
b<-1
match<- 1:dim(mat)[1]
while (b <= (k+1)) {
match<- match[mat[match,b]==as.numeric(vec[b])]
b<-b+1
} #close while loop
return(length(match))
}


###### function to use MDR, calculate balanced accuracy, save top x models

mdr<-function(split,comb,x,ratio,equal="HR",genotype=c(0,1,2)) { #split is the split of the data, comb is the matrix of combinations to consider (row=comb, col=loci)
          # x is how many models to save, ratio is threshold to compare each cell, equal is how to define cells where cases=ctrls ("HR or LR")
          # includes balanced accuracy and missing data (where missing is encoded with NA)
if (is.vector(comb)) comb<-as.matrix(comb)  #need to make sure its read as a matrix
    x.new<-ifelse(x>dim(comb)[1], dim(comb)[1], x) #if x is bigger than the number of combinations, reduce to the number of combinations
    N<-dim(split)[1]
    n<-dim(split)[2]
    n.comb<-dim(comb)[1]
    k<-dim(comb)[2]  

    results <- matrix(0, n.comb, k+1)  #matrix to store results (classification)

    results[, 1:k] <- comb  #first nbr columns (model size) replaced with all possible combinations
    g<-length(genotype)

    geno <- list(genotype) # count three genotypes
    case <- cbind(rep(1, g^k), expand.grid(rep(geno, k))) #creates design matrix with all combinations (contingency for cases)
    ctrl <- cbind(rep(0, g^k), expand.grid(rep(geno, k))) #this acts as contingency table for controls
    counts <- matrix(0, dim(case)[1], 3)  #store number of cases, controls, ratio for all combinations

       snps <- split[, 2:n]                               #snps for training 
       hr.lr<-matrix(0,n.comb,dim(case)[1])   # matrix to store hr/lr combination (1=high, 0=low)

        for (i in 1:n.comb) {  #loop over all genotype combinations

            if (is.vector(comb)) {model<-comb[i]} else {model<-comb[i,]}

part<-split[, c(1, model + 1)]
miss<-is.na(part) #which entries are missing?
miss2<-apply(miss,1,sum) #which rows have missings? (sums > 0?)
if (sum(miss2)>0  ) {part<-part[-which(miss2>0),]} #eliminate the rows which have missings (only if there is missing data)
  
            counts[,1]<-apply(case,1,compare,mat=part,k=k)   #which rows match (cases with given combination)?
	                               # gives number of cases for each genotype combination 
  
            counts[,2]<-apply(ctrl,1,compare,mat=part,k=k)  #which rows match (controls with given combination)?
	                                # gives number of controls for each genotype combination

            counts[, 3] <- counts[, 1]/counts[, 2]   #observed ratio
	if (equal=="HR") {hr.lr[i,]<-(counts[, 3] >= ratio)} else { if (equal=="LR") { 
			hr.lr[i,]<-(counts[, 3] > ratio)} else {print("Improper value for argument 'equal' ")}}  #define which combinations are hr and lr 
		tp<- sum(counts[which(hr.lr[i,]==1), 1]) #true positives
		fp<- sum(counts[which(hr.lr[i,]==1), 2]) #false positives
		tn<- sum(counts[which(hr.lr[i,]==0), 2]) #true negatives
		fn<- sum(counts[which(hr.lr[i,]==0), 1]) #false negatives

            results[i, k+1] <- 100*( tp/(tp+fn) + tn/(tn+fp) ) / 2
                   										 # maximize balanced accuracy           
        }
    sorted<-order(results[,k+1], decreasing=TRUE) #want maximum
    best.models<-results[sorted[1:x.new],]
    best.hrlr<-hr.lr[sorted[1:x.new],]
    if (x.new>1) {models<-best.models[,1:k]} else {models<-best.models[1:k]}
    if (x.new>1) {acc<-best.models[,k+1]} else {acc<-best.models[k+1]}

    return(list("models"=models, "balanced accuracy"=acc, "high risk/low risk"=best.hrlr)  )

}   #end mdr function

mdr.hr<-function(split,model,hr,genotype=c(0,1,2)) { #split is the split of the data, model combination of loci to consider (row=comb, col=loci), 
          # hr is an indicator for which cmb are high risk
    N<-dim(split)[1]
    n<-dim(split)[2]
    k<-length(model)
    g<-length(genotype)

    geno <- list(genotype) # count three genotypes
    case <- cbind(rep(1, g^k), expand.grid(rep(geno, k))) #creates design matrix with all combinations (contingency for cases)
    ctrl <- cbind(rep(0, g^k), expand.grid(rep(geno, k))) #this acts as contingency table for controls
    counts <- matrix(0, g^k, 2)  #store number of cases, controls

       snps <- split[, 2:n]                               #snps for training 

part<-split[, c(1, model + 1)]
miss<-is.na(part) #which entries are missing?
miss2<-apply(miss,1,sum) #which rows have missings? (sums > 0?)
if (sum(miss2)>0  ) {part<-part[-which(miss2>0),]} #eliminate the rows which have missings (only if there is missing data)
  
            counts[,1]<-apply(case,1,compare,mat=part,k=k)   #which rows match (cases with given combination)?
	                               # gives number of cases for each genotype combination 
  
            counts[,2]<-apply(ctrl,1,compare,mat=part,k=k)  #which rows match (controls with given combination)?
	                                # gives number of controls for each genotype combination
		
		tp<- sum(counts[which(hr==1), 1]) #true positives
		fp<- sum(counts[which(hr==1), 2]) #false positives
		tn<- sum(counts[which(hr==0), 2]) #true negatives
		fn<- sum(counts[which(hr==0), 1]) #false negatives

            results<- 100*( tp/(tp+fn) + tn/(tn+fp) ) / 2      								      
    return(list("balanced accuracy"=results)  )
}   #end mdr.hr function


mdr.cv<-function (data, K, cv, ratio = NULL, equal="HR",genotype=c(0,1,2)) #K is highest level of interaction to consider, cv is number of cv intervals
{
    if (is.null(ratio)) 
	 ratio <- length(which(data[,1] == 1))/length(which(data[,1] == 0))   #set threshold to be ratio of cases to controls in whole data      
    N <- dim(data)[1]   #number of individuals
    p <- dim(data)[2]   # number of snps + 1
    n <- N %/% cv  # number of individuals per split (minus remainder)
    r<- N-n*cv   #remainder
 
        data <- data[sample(1:N), ]  #randomly order the data for the cv split

###### store results
models<-vector("list",K)
acc<-matrix(0,K,3) #save ca, pa, cvc

######## Perform MDR for first split for 1-nbr levels of interaction (model building)
for (i in 1:K) {

best.mod<-matrix(0,cv,i)
ca<-rep(0,cv)
pa<-rep(0,cv)

######### start cross validation  
for (k in 1:cv) {



#need to split the data
if (k==cv) { build<- data[-((1+(k-1)*n):(k*n+r)),] } else { build<- data[-((1+(k-1)*n):(k*n)),]
                                                            } #make sure we put the remainder individuals in a prediction interval
	geno <- combn(p - 1, i)  #list of all possible combinations
        res<-mdr(build,t(geno),1,ratio,equal,genotype) #pick only the best model
        best.mod[k,]<-res$models
        ca[k]<-res$bal    #put best models and accuracies into the list to store (by cv interval, then model size)
if (k==cv) {predict<- data[(1+(k-1)*n):(k*n+r),]} else { predict<- data[(1+(k-1)*n):(k*n),] }

        res<-mdr.hr(predict,t(best.mod[k,]),res$high,genotype)  #get prediction accuracies only for this list
        pa[k]<-res$bal  #put best models into the list to store 

} #close cv loop

#get avg ca, pa, and cvc for this level and pick best model
unique.results<-as.matrix(unique(best.mod[,1:i]))
cvc<-rep(0,nrow(unique.results))

for (j in 1:length(cvc)) {   #count up cross-val consistency
for (k in 1:cv) {
if (sum(best.mod[k,]==unique.results[j,])==i) {
    cvc[j]<-cvc[j]+1 }
} }

if (i>1) { best<-matrix(unique.results[which(cvc==max(cvc)),], ncol=i) } else { best<-as.matrix(unique.results[which(cvc==max(cvc)),]) } #make sure we have correct dimensions
  #what about ties in cvc?

if (nrow(best)==1) {  sum.ca <- 0
  for(k in 1:cv){  #loop across cv intervals
    
     if (sum(best.mod[k,]==best)==i) {    
      sum.ca <- sum.ca + ca[k]  #recursion to add up classification accuracy
    } }  #close if and for loop
  
avg.ca <- sum.ca/max(cvc) } else { b<-dim(best)[1]

sum.ca <- rep(0,b)
  
for(k in 1:cv){  #loop across cv intervals
for(l in 1:b) { #loop over high cvc models    
     if (sum(best.mod[k,]==best[l,])==i) {    
      sum.ca[l] <- sum.ca[l] + ca[k]  #recursion to add up classification accuracy
    } }  #close if and for loop
}
avg.ca <- sum.ca/max(cvc) 
best<-best[which(avg.ca==max(avg.ca)),]
avg.ca<-max(avg.ca)
}

models[[i]]<-best  #chose model with max cvc and max classification accuracy

  ### best.model.PA   #same idea, find average prediction accuracy of best model
    sum.pa <- 0
  for(k in 1:cv){  #loop across cv intervals
    if (sum(best.mod[k,]==best)==i) {    
      sum.pa <- sum.pa + pa[k]  #recursion to add up classification error
    }
  }

  avg.pa <- sum.pa/max(cvc)  #average prediction error

acc[i,1]<-avg.ca
acc[i,2]<-avg.pa
acc[i,3]<-max(cvc)
colnames(acc)<-list("classification accuracy", "prediction accuracy", "cross-validation consistency")

} #close K loop


final.acc<-which(acc[,2]==max(acc[,2]))	#final model maximizes p accuracy across levels
final.cvc<-which(acc[,3]==max(acc[,3]))	#final model maximizes cvc across levels

if (length(final.cvc)>1) { if (length(which(final.cvc==final.acc))>0) {
final.cvc<-final.cvc[which(final.cvc==final.acc)] } } #in case some models have the same CVC, use the one with maximum acc

final<-min(c(final.acc, final.cvc))  #pick index which is smallest of the two, if not equal (parsimony)

final.model<-models[[final]]
hrlr<-mdr(data,final.model,1,ratio) #get hr/lr of final model

obj<-list("final model"=final.model, "final model accuracy"= acc[final,2], "top models"=models, "top model accuracies"=acc, "high-risk/low-risk"=hrlr$high, "genotypes"=genotype, "validation method"="CV")
class(obj)<-"mdr" #define mdr class
return(obj)

}  #end cv function


mdr.3WS<-function (data, K, x=NULL, proportion=NULL, ratio = NULL, equal="HR",genotype=c(0,1,2)) #x is threshold to send to testing set, K is highest level of interaction to consider, split is the ratio training:testing:validation (default is 2:2:1)
{
    if (is.null(ratio)) 
        ratio <- length(which(data[,1] == 1))/length(which(data[,1] == 0))   #set threshold to be ratio of cases to controls in whole data
    N <- dim(data)[1]   #number of individuals
    p <- dim(data)[2]   # number of snps + 1
    if (is.null(proportion)) 
        proportion <- c(2,2,1)   #set default proportions to 2:2:1
    if (is.null(x))     
        x<-p-1 #default is the number of snps
        data <- data[sample(1:N), ] #randomly order the data for the three-way split
    prop<- sum(proportion) #number of proportions for the split
    n <- N%/%prop  # number of individuals per proportion (minus remainder)
#need to split the data
   test<-data[1:(proportion[2]*n),] #second split
   val<-data[(proportion[2]*n+1):((proportion[2]+proportion[3])*n),] #third split
   train<-data[((proportion[2]+proportion[3])*n+1):N,]  #first split; want training set to be largest so add remainder

################## Store results
models<-vector("list",K) #store final model for each level of K
accuracy<-matrix(0,K,3) #save training, testing, and prediction accuracy for each level of K

######## Perform MDR for first split for 1-nbr levels of interaction (training)

results1<-vector("list",K)  #create matrices to store results for each level

for (i in 1:K) {
	geno <- combn(p - 1, i)  #list of all possible combinations
        x.new<-ifelse(x>dim(geno)[2], dim(geno)[2], x) #if x is bigger than the number of combinations, reduce to the number of combinations
        res<-mdr(train,t(geno),x.new,ratio,equal,genotype)
        results1[[i]]<-res }  #put best models into the list to store

######## Perform MDR for second split for 1-nbr levels of interaction (testing)

results2<-vector("list",K)  #create matrices to store results for each level

for (i in 1:K) {
	comb<-results1[[i]]$models  #extract models of interest
        res<-mdr(test,comb,1,ratio,equal,genotype) #only save the best model for each level
        results2[[i]]<-res }  #put best models into the list to store

####### Perform MDR for third split and return model + prediction error
results3<-vector("list",K)  #create matrices to store results for each level
acc<-NULL
for (i in 1:K) {
	comb<-t(as.matrix(results2[[i]]$models))  #extract models of interest; make sure it reads it as matrix and not vector
        res<-mdr(val,comb,1,ratio,equal,genotype) #only save the best model for each level
        acc<-c(acc,res$bal)
        results3[[i]]<-res }

final<-which(acc==max(acc))	#final model maximizes accuracy across levels (minimizes prediction error)
final<-min(final) #choose model of smallest size (parsimony) if more than one model has the minimum prediction error

final.model<-results3[[final]]$models
BA<-results3[[final]]$bal
hrlr<-mdr(data,t(final.model),1,ratio) #get hr/lr of final model

###get final model for each level K and accuracy
for (i in 1:K) {
models[[i]]<-results3[[i]]$models
index<-j<-0
while (index<1) {
j<-j+1
if (sum(as.matrix(results1[[i]]$models)[j,]==results3[[i]]$models)==i) index<-j
}

accuracy[i,1]<-results1[[i]]$'balanced accuracy'[index] #training accuracy
accuracy[i,2]<-results2[[i]]$'balanced accuracy' #testing accuracy
accuracy[i,3]<-results3[[i]]$'balanced accuracy' #prediction accuracy
}

colnames(accuracy)<-list("training accuracy", "testing accuracy", "prediction accuracy")
obj<-list("final model"=final.model, "final model accuracy"=BA, "top models"=models, "top model accuracies"=accuracy, "high-risk/low-risk"=hrlr$high, "genotypes"=genotype, "validation method"="3WS")
class(obj)<-"mdr" #define mdr class
return(obj)

}  #end 3way function

###############################
# post-hoc functions
###############################

mdr.ca.adj<-function(data,model,hr,prev,genotype=c(0,1,2)) { #model combination of loci to consider (row=comb, col=loci) read as vector, 
          # hr is an indicator for which cmb are high risk, prev is prevalence estimate
    N<-dim(data)[1]
    n<-dim(data)[2]
    k<-length(model)
    g<-length(genotype)

p.case <- length(which(data[,1] == 1))/ N   #estimated probability of being a case in the sample

    geno <- list(genotype) # count three genotypes
    case <- cbind(rep(1, g^k), expand.grid(rep(geno, k))) #creates design matrix with all combinations (contingency for cases)
    ctrl <- cbind(rep(0, g^k), expand.grid(rep(geno, k))) #this acts as contingency table for controls
    counts <- matrix(0, g^k, 2)  #store number of cases, controls

       snps <- data[, 2:n]                               #snps for training 

part<-data[, c(1, model + 1)]
miss<-is.na(part) #which entries are missing?
miss2<-apply(miss,1,sum) #which rows have missings? (sums > 0?)
if (sum(miss2)>0  ) {part<-part[-which(miss2>0),]} #eliminate the rows which have missings (only if there is missing data)
  
            counts[,1]<-apply(case,1,compare,mat=part,k=k)   #which rows match (cases with given combination)?
	                               # gives number of cases for each genotype combination 
  
            counts[,2]<-apply(ctrl,1,compare,mat=part,k=k)  #which rows match (controls with given combination)?
	                                # gives number of controls for each genotype combination
		
		tp<- sum(counts[which(hr==1), 1]) #true positives
		fp<- sum(counts[which(hr==1), 2]) #false positives
		tn<- sum(counts[which(hr==0), 2]) #true negatives
		fn<- sum(counts[which(hr==0), 1]) #false negatives

            results<- 100-(100/N)*( ((1-prev)/(1-p.case))*fp + (prev/p.case)*fn )      								      
    return(list("adjusted classification accuracy"=results, "adjusted classification error"=100-results)  )
}   #end mdr.ca.adj function; this does not implement CV

boot.error<-function(data, prev, model, hr, b, genotype=c(0,1,2)) {# inputs are case/control dataset, prevalence estimate, and the final mdr model loci (as a vector), and high-risk/low-risk, b is number of bootstrap samples

data<-data[, c(1, model + 1)] #reduce data to only the model of interest
miss<-is.na(data) #which entries are missing?
miss2<-apply(miss,1,sum) #which rows have missings? (sums > 0?)
if (sum(miss2)>0  ) {data<-data[-which(miss2>0),]} #eliminate the rows which have missings (only if there is missing data)

n<-dim(data)[1]

cases<-data[which(data[,1]==1),]
ctrls<-data[which(data[,1]==0),]
n.case<- as.integer(round(prev*n,0))
n.ctrl<- n-n.case
boot.error<-rep(0,b) #vector to save error estimates for each bootstrap sample

for (i in 1:b) { #loop across bootstrap samples
index.cases<-sample(seq(1:(dim(cases)[1])),n.case,replace=TRUE)
index.ctrls<-sample(seq(1:(dim(ctrls)[1])),n.ctrl,replace=TRUE)
boot.cases<-cases[index.cases,]  #this is bootstrap resample
boot.ctrls<-ctrls[index.ctrls,]

k<-length(model)
g<-length(genotype)

geno <- list(genotype) # define possible genotypes
x.case <- cbind(rep(1, g^k), expand.grid(rep(geno, k))) #creates design matrix with all combinations (contingency for cases)
x.ctrl <- cbind(rep(0, g^k), expand.grid(rep(geno, k))) #this acts as contingency table for controls
n.comb <- dim(x.case)[1]  # number of genotype combinations

counts<-matrix(0,n.comb,3) #store number of cases, ctrls, and sample prev for each combination
snps.case<-boot.cases[,-1]
snps.ctrl<-boot.ctrls[,-1]

counts[, 1] <- apply(x.case, 1, compare,mat=boot.cases,k=k)  # gives number of cases for each genotype combination 
counts[, 2] <- apply(x.ctrl, 1, compare,mat=boot.ctrls,k=k)  # gives number of controls for each genotype combination

		tp<- sum(counts[which(hr==1), 1]) #true positives
		fp<- sum(counts[which(hr==1), 2]) #false positives
		tn<- sum(counts[which(hr==0), 2]) #true negatives
		fn<- sum(counts[which(hr==0), 1]) #false negatives

error <- 100 * (fp + fn)/n
boot.error[i]<-error  } #end loop across bootstrap samples

est<-mean(boot.error) #estimate is the mean across b samples
return(list("classification error estimate"=est, "classification accuracy estimate"=100-est))
} #end function

permute.mdr<-function(accuracy, loci, N.permute, method = c("CV", "3WS","none"), 
    data, cv, K, x = NULL, proportion = NULL, ratio = NULL, equal = "HR", 
    genotype = c(0, 1, 2), LRT = FALSE) 
{
    if (is.null(ratio)) 
        ratio <- length(which(data[, 1] == 1))/length(which(data[, 
            1] == 0))
    mdr.lrt <- function(data, loci, x, ratio, equal, genotype) {
        X <- matrix(0, dim(data), length(loci))
        for (i in 1:length(loci)) {
            hr <- mdr(data, loci[i], x, ratio, equal, genotype)
            hr <- hr$"high risk/low risk"
            f <- as.factor(data[, (loci[i] + 1)])
            s <- nlevels(f)
            d <- diag(s)[f, ]
            X[, i] <- d %*% hr
        }
        form1 <- paste(paste("X[,", 1:length(loci), "]", sep = ""), 
            collapse = "*")
        com <- combn(1:length(loci), length(loci) - 1)
        term <- NULL
        for (j in 1:dim(com)[2]) {
            term <- c(term, paste(paste("X[,", com[, j], "]", 
                sep = ""), collapse = "*"))
        }
        form2 <- paste(term, collapse = "+")
        lrfit.full <- glm(as.formula(paste("data[,1]~", form1, 
            sep = "")), family = binomial(link = logit))
        lrfit.reduced <- glm(as.formula(paste("data[,1]~", form2, 
            sep = "")), family = binomial(link = logit))
        x2 <- lrfit.reduced$deviance - lrfit.full$deviance
        return(x2)
    }
    if (LRT == TRUE) {
        x2 <- mdr.lrt(data = data, loci = loci, x = 1, ratio = ratio, 
            equal = equal, genotype = genotype)
        chi <- rep(0, N.permute)
    }
    acc <- rep(0, N.permute)
    if (method == "CV") {
        for (i in 1:N.permute) {
            permute <- sample(data[, 1], length(data[, 1]), replace = FALSE)
            new <- cbind(permute, data[, -1])
            fit <- mdr.cv(data = new, K, cv, ratio, equal, genotype)
            acc[i] <- fit$"final model accuracy"
            if (LRT == TRUE) {
                chi[i] <- mdr.lrt(data = new, loci = fit$"final model", 
                  x = 1, ratio = ratio, equal = equal, genotype = genotype)
            }
        }
    }
    if (method == "3WS") {
        for (i in 1:N.permute) {
            permute <- sample(data[, 1], length(data[, 1]), replace = FALSE)
            new <- cbind(permute, data[, -1])
            fit <- mdr.3WS(data = new, K, x, proportion, ratio, 
                equal, genotype)
            acc[i] <- fit$"final model accuracy"
            if (LRT == TRUE) {
                chi[i] <- mdr.lrt(data = new, loci = fit$"final model", 
                  x = 1, ratio = ratio, equal = equal, genotype = genotype)
            }
        }
    }
    if (method == "none") {
        for (i in 1:N.permute) {
            permute <- sample(data[, 1], length(data[, 1]), replace = FALSE)
            new <- cbind(permute, data[, -1])
            fit <- mdr(split = new, comb=loci, x=1, ratio=ratio,equal=equal,genotype=genotype)
            acc[i] <- fit$"balanced accuracy"
            if (LRT == TRUE) {
                chi[i] <- mdr.lrt(data = new, loci = fit$"models", 
                  x = 1, ratio = ratio, equal = equal, genotype = genotype)
            }
        }
    }	
    acc <- sort(acc)
    pval <- sum(acc > accuracy)/N.permute
    result <- list(`Permutation P-value` = pval, `Permutation Distribution` = acc)
    if (LRT == TRUE) {
        chi <- sort(chi)
        lrt.p <- sum(chi > x2)/N.permute
        result <- list(`Permutation P-value` = pval, `Permutation Distribution` = acc, 
            `LRT P-value` = lrt.p, `LRT Distribution` = chi)
    }
    return(result)
}

################### define methods for the 'mdr' class

summary.mdr<-function(object, ...) { #previously fit mdr object

k<-length(object$'top models')  #define sizes of interaction considered
level<-1:k  #define the sequences of levels
best.models<-matrix(NA,k,k) #matrix to store the best models for each level

for (i in 1:k) {
mod<-object $'top models'[[i]]
if (length(mod)==k) {
best.models[i,]<-mod} else {
add<-k-length(mod)
best.models[i,]<-c(mod, rep(NA,add))  #store the best model for level i
}
}

model<-object$'final model'
best<-NULL
for (i in 1:k) {
if (i==length(model)) {best<-c(best,"*") } else {best<-c(best,"") } 
}

if (object$'validation method'=="3WS") {

	train<-round(object $'top model accuracies'[,1], digits=2 )#save training, testing, and validation accuracies
	test<-round(object $'top model accuracies'[,2], digits=2 )
	val<-round(object $'top model accuracies'[,3], digits=2 )

	tab<-cbind(level,best.models,train,test,val)
	rownames(tab)<-best
	colnames(tab)<-c("Level", "   Best Models", paste(rep("", k-1), sep=""), "   Training Accuracy", "   Testing Accuracy", "   Validation Accuracy")
	print(tab, na.print="")
	cat("", "\n")
	cat("'*' indicates overall best model")
	}    else {  if   (object$'validation method'=="CV") { 

		ca<-round(object $'top model accuracies'[,1], digits=2 )#save classification and prediction accuracy and CVC
		pa<-round(object $'top model accuracies'[,2], digits=2 )
		cvc<-round(object $'top model accuracies'[,3], digits=2 )

		tab<-cbind(level,best.models,ca,pa,cvc)
		rownames(tab)<-best
		colnames(tab)<-c("Level", "   Best Models", paste(rep("", k-1), sep=""), "   Classification Accuracy", "   Prediction Accuracy", "   Cross-Validation Consistency")
		print(tab, na.print="")
		cat("", "\n")
		cat("'*' indicates overall best model")
		}  else {  print("invalid mdr object")
		} }#close nested ifs
 } #close function 

predict.mdr<-function(object,new.data,...){ #previously fit mdr object, and a new dataset with the same original variables in the same order (minus the response)
model<-object$'final model'
hr<-object$high
k<-length(model)
geno<-list(object$genotypes) #need to define the genotype combinations for the model
comb<-as.matrix(expand.grid(rep(geno, k)))
new.data<-as.matrix(new.data[, model]) #reduce data to only the model of interest
match<-matrix(0,dim(new.data)[1],dim(comb)[1])#define matrix to store genotype combination
for (i in 1:dim(new.data)[1]) {
for (j in 1:dim(comb)[1]) {
match[i,j]<- sum(new.data[i,]==comb[j,])==k  #create indicator for genotype combination j for individual i
}#close loop over combinations
}#close for loop over individuals
predict<-match%*%hr #indicator for genotype combination of individual i is high risk
return(predict)
}#close function

plot.mdr<-function(x,data,main="",xlab="",ylab="Count", table=FALSE,...){ #previously fit MDR object and associated dataset; table arg gives a summary table of the counts

mod<-x$'final model'
geno<-list(as.factor(x$genotypes)) #read as categorical for appropriate labeling
g<-length(x$genotypes)
k<-length(mod)
m<-k%/%2
hr<-x$high

r<- g^m #number of rows for plotting window
c<- g^(k-m) #number of columns for plotting window

case<-data[which(data[,1]==1),mod+1] #split data into cases and controls
ctrl<-data[which(data[,1]==0),mod+1]

tab1<-table(case)  #create contingency table of multifactorial genotypes
tab0<-table(ctrl)

comb<-expand.grid(rep(geno,k))
dat<-c(as.vector(tab1),as.vector(tab0))
dat<-data.frame(dat)
dat<-cbind(dat,c(rep("Case",g^k),rep("Control",g^k)))
comb2<-rbind(comb,comb)
hr2<-rbind(hr,hr)
dat<-cbind(dat,comb2,hr)

snps<-NULL
for (i in 1:k) {
snps<-c(snps,paste("SNP",as.character(mod[i]),sep="")) }

colnames(dat)<-c("count","status",snps,"high-risk")
if (table) {print(dat)}

genotypes<-snps[1]
if (k>1) {for (i in 2:k) {
genotypes<-paste(genotypes,snps[i],sep="*") } }

form<-formula(paste("count~status |",genotypes))

panel.color<-function(x,y,...) { 
color<-c("black","white")
if(y[1]>y[2])panel.fill(col="gray")
panel.barchart(x,y,col=color,horizontal=FALSE)
} 

barchart(form, data=dat,layout=c(c,r),
        ylab=ylab, xlab=xlab,
        strip=strip.custom(strip.names=c(TRUE,TRUE),strip.levels=c(TRUE,TRUE)),
        panel=panel.color, key=list(text=list(c("High-Risk","Low-Risk"),columns=2,title="Legend"),rectangles=list(col=c("gray","white"),columns=2)), main=main)

} #close function
Any scripts or data that you put into this service are public.
MDR documentation built on May 29, 2017, 7:05 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MDR
Detect gene-gene interactions using multifactor dimensionality reduction

R/mdr.r
In MDR: Detect gene-gene interactions using multifactor dimensionality reduction

Defines functions compare mdr mdr.hr mdr.cv mdr.3WS mdr.ca.adj boot.error permute.mdr summary.mdr predict.mdr plot.mdr

Documented in boot.error compare mdr mdr.3WS mdr.ca.adj mdr.cv mdr.hr permute.mdr plot.mdr predict.mdr summary.mdr

Try the MDR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MDR Detect gene-gene interactions using multifactor dimensionality reduction

R/mdr.r In MDR: Detect gene-gene interactions using multifactor dimensionality reduction

Defines functions compare mdr mdr.hr mdr.cv mdr.3WS mdr.ca.adj boot.error permute.mdr summary.mdr predict.mdr plot.mdr

Documented in boot.error compare mdr mdr.3WS mdr.ca.adj mdr.cv mdr.hr permute.mdr plot.mdr predict.mdr summary.mdr

Try the MDR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MDR
Detect gene-gene interactions using multifactor dimensionality reduction

R/mdr.r
In MDR: Detect gene-gene interactions using multifactor dimensionality reduction