R/DRaWR.R

Defines functions DRaWR RWR threeCol2listMat threeCol2MaxMat

Documented in DRaWR RWR threeCol2listMat threeCol2MaxMat

#' threeCol2MaxMat
#'
#' This function takes a three vectors of equal length (source nodes, target nodes, and edge weights) and return the adjacency matrix as a sparse Matrix.
#' @param a (vector): vector of source node names.
#' @param b (vector): vector of target node names.
#' @param v (vector): vector of edge weights names.
#' @return sparce Matrix.
#' @keywords sparce matrix
#' @import Matrix
#' @export
#' @examples
#' threeCol2MaxMat(a = c("a","b","c","c"), b = c("a","b","b","b"), v = c(1,2,3,4))
threeCol2MaxMat<- function(a = c("a","b","c","c"), b = c("a","b","b","b"), v = c(1,2,3,4)){
#	library(Matrix)
	if(length(a)!=length(b) | length(a)!=length(v)){
		return(-1)
	}
	a = as.character(a)
	b = as.character(b)
	v = as.numeric(v)
	avals = sort(unique(a))
	bvals = sort(unique(b))

	nrows = length(avals)
	ncols = length(bvals)

	aidxs = match(a, avals)
	bidxs = match(b, bvals)

	vidxs = (bidxs-1)*nrows+aidxs

	retMat = Matrix(0, nrows, ncols, dimnames = list(avals,bvals), sparse=T)
	retMat[vidxs] = v
# arbitrarily selects one value if multiple values for same edge
	return(retMat)
}

#' threeCol2listMat
#'
#' This function takes a three vectors of equal length (source nodes, target nodes, and edge weights) and return the adjacency matrix as a list of vectors.
#' @param a (vector): vector of source node names.
#' @param b (vector): vector of target node names.
#' @param v (vector): vector of edge weights names.
#' @return list of vectors matrix representation.
#' @keywords sparce matrix
#' @export
#' @examples
#' threeCol2listMat(a = c("a","b","c","c"), b = c("a","b","b","b"), v = c(1,2,3,4))
threeCol2listMat<- function(a = c("a","b","c","c"), b = c("a","b","b","b"), v = c(1,2,3,4)){
	if(length(a)!=length(b) | length(a)!=length(v)){
		return(-1)
	}
	a1 = as.character(a)
	b1 = as.character(b)
	v1 = as.numeric(v)

	show("sorting...")
	ss = sort(a1, index.return=T)
	a2 = a1[ss$ix]
	b2 = b1[ss$ix]
	v2 = v1[ss$ix]

	avals = unique(a2)
	bvals = sort(unique(b2))
#	nvals = 10000  # for testing limit
	nvals = length(a2)
	nrows = length(avals)
	ncols = length(bvals)

	listm = list()
	colsums = 1:length(bvals)*0
	show("creating list...")
	curval = a2[1]
	idxstart = 1
	for (i in 2:nvals){
		if(i%%100000==1){show(i)}

		if(a2[i] != curval){
			# process values for row
			cols = b2[idxstart:(i-1)]
			vals = v2[idxstart:(i-1)]
# arbitrarily selects one value if multiple values for same edge
			listidxs = match(unique(cols), cols)
			colidxs = match(cols[listidxs],bvals)
			listm[[as.character(curval)]] = list("colidxs" = colidxs,"vals" = vals[listidxs])
			colsums[colidxs] = colsums[colidxs] + vals[listidxs]
			# update pointers
			curval = a2[i]
			idxstart = i
		}
	}
	# process values for end
	if(a2[i]==a2[i-1]){
		cols = b2[idxstart:i]
		vals = v2[idxstart:i]
# arbitrarily select one value if multiple values for same edge
		listidxs = match(unique(cols), cols)
		colidxs = match(cols[listidxs],bvals)
		listm[[as.character(curval)]] = list("colidxs" = colidxs,"vals" = vals[listidxs])
		colsums[colidxs] = colsums[colidxs] + vals[listidxs]
	}
	return(list("listm" = listm, "avals" = avals, "colsums"=colsums))
}


#' RWR
#'
#' This function runs a random walk with restart using two supported matrix representations.
#' @param boolSparceMat (bool): Boolean to indicate sparce Matrix or list matrix.
#' @param transmat (sparce Matrix / list matrix): transition probabilites.
#' @param restart (float): probability of restart.
#' @param query (vector): probability of restarting at all nodes.
#' @param startvec (vector): initial probability of being at any node.
#' @param maxiters (int): maximum number of allowable iterations.
#' @param thresh (float): threshold for L1 norm convergence.
#' @return list of 'iter':number of iterations, 'diff': L1 norm of difference, 'vec': converged probability distribution vector.
#' @keywords random walk with restart
#' @import Matrix
#' @export
#' @examples
#' RWR(boolSparceMat=TRUE, transmat=transmat, restart=.3, query=c(rep(0.1,10),rep(0,5)),
#'		startvec=rep(1/15,15), maxiters=10, thresh=0.001)
RWR<- function(boolSparceMat, transmat, restart, query, startvec, maxiters, thresh){
#	library(Matrix)
	damping = 1-restart
	query = query / sum(query)
	vec = startvec
	mycolsum = NULL
	mylistm = NULL
	mismatch = 0

	if(!boolSparceMat) {
		mycolsums = transmat$colsums
		mylistm = transmat$listm
		mismatch = sum(names(query)!=names(mylistm))
	} else {
		mismatch = sum(names(query)!=colnames(transmat))
	}

	if(mismatch>0){
		show("data names do not match")
		return(-1)
	}

	for (i in 1:maxiters){
		newvec = NULL
		if(boolSparceMat) {
			newvec = damping*transmat%*%vec+restart*query
		} else {
			rr = sapply(mylistm,function(x){vec[x$colidxs]%*%(x$vals/mycolsums[x$colidxs])})
			newvec = damping*rr+restart*query
		}
		diff = sum(abs(newvec-vec))
		vec = newvec
#		show(c(i,signif(diff,3)))
		if(diff < thresh) { break }
	}
	vec = as.vector(vec)
	names(vec) = names(query)
	return(list("iter" = i, "diff" = diff, "vec"=vec))
}


#' DRaWR
#'
#' This function runs the DRaWR two stage random walk with restart method.
#' @param possetfile (string): location of file containing location of gene sets to test.
#' @param unifile (string): location of file listing gene universe.
#' @param networkfile (string): location of file containing network contents.
#' @param outdir (string): prefix of location of file to write performance results (optionally prediction results).
#' @param restarts (vector): vector of restart values to test. Default is c(0.7).
#' @param nfolds (int): number of folds for cross validation, 1 is no cross-validation. Default is 4.
#' @param st2keep (int): number of property nodes to keep in second stage for each property type. Default is 50.
#' @param undirected (bool): boolean to make network undirected.
#' @param unweighted (bool): boolean to make network unweighted.
#' @param normalize (string): "type" or "none". Default is 'type'.
#' @param maxiters (int): maximum number of allowable iterations. Default is 50.
#' @param thresh (float): threshold for L1 norm convergence. Default is 0.001.
#' @param property_types (vector): list of possible property types. Default is c("go_curated_evidence", "go_inferred_evidence", "pfam_domain").
#' @param writepreds (boolean): write predictions out to a file. Default is FALSE
#' @keywords random walk with restart
#' @import Matrix
#' @importFrom Matrix colSums
#' @importFrom ROCR prediction
#' @importFrom ROCR performance
#' @export
#' @examples
#' DRaWR(possetfile = system.file("extdata", "sample.setlist", package="DRaWR"),
#'		unifile = system.file("extdata", "sample.uni", package="DRaWR"),
#'		networkfile = system.file("extdata", "sample.edge", package="DRaWR"),
#'		outdir = "exampleRun_", restarts = c(.7), nfolds = 1, st2keep = 1,
#'		undirected = TRUE, unweighted = FALSE, normalize = "type", maxiters = 50,
#'		thresh = 0.0001, property_types = c("T1", "T2"), writepreds = 0)
DRaWR<- function(possetfile = "extdata/sample.setlist", unifile = "extdata/sample.uni", networkfile = "extdata/sample.edge", outdir = "output_", restarts = c(.7), nfolds = 1, st2keep = 1, undirected = TRUE, unweighted = FALSE, normalize = "type", maxiters = 50, thresh = 0.0001, property_types = c("allen_brain_atlas", "chip_binding", "gene_ontology", "motif_u5", "pfam_domain", "T1", "T2"), writepreds = 0){

#	library(Matrix)
#	library(ROCR)

	# set up results values
	uni = tail(unlist(strsplit(unifile, "/")),1)
	uni = gsub(".txt","",uni)
	network = tail(unlist(strsplit(networkfile, "/")),1)
	network = gsub(".edge","",network)
	network = gsub(".txt","",network)
	possetname = tail(unlist(strsplit(possetfile, "/")),1)
	possetname = gsub(".txt","",possetname)
	weight = 'weight'
	if(unweighted){ weight = 'unweight' }
	directed = 'dir'
	if(undirected){ directed = 'undir' }

	# set up results output
	restable = NULL
	resfile = paste(sep="", outdir, uni, ".", network, ".", directed, ".", weight, ".", normalize, ".", maxiters, ".", thresh, ".", st2keep, ".", nfolds, ".", paste(collapse="_", restarts), ".", possetname, ".stats")
	if(file.exists(resfile)){
		show(c("WARNING: File already exists ", resfile));
		#return(1)
	}
	write.table(restable, resfile, quote=F, sep="\t", row.names=F)
	show(resfile)
	
	# read in edge file
	show('Reading Original File')
	edges = read.table(networkfile)
	colnames(edges) = c("src", "target", "weight","type")
	edges$weight <- as.numeric( as.character( edges$weight ) )

	# check for strange values, 0, negs, max
	wmin = min(edges$weight)
	wmins = edges[which(edges$weight==wmin),]
	show("min edges")
	show(wmins[1:min(3,length(wmins[,1])),])
	if(wmin <= 0){
		show("Invalid Edge Weights")
		return(-1)
	}
	wmax = max(edges$weight)
	wmaxs = edges[which(edges$weight==wmax),]
	show("max edges")
	show(wmaxs[1:min(3,length(wmaxs[,1])),])

	# collect info about feature nodes
	all_etypes = as.character(unique(edges$type))
	prop_etypes = intersect(all_etypes, property_types)
	ntypes = length(prop_etypes)
	features = unique(edges[which(edges$type %in% prop_etypes),c("src","type")])
	featnodes = as.character(features[,"src"])
	nfeats = length(featnodes)
	nkeep = min(st2keep*ntypes,nfeats)
	show(c("num_prop_types:", ntypes, 'num_prop_nodes', nfeats))

	# make undirected
	if(undirected){
		show("Making Network Undirected")
		tmpsrc = c(as.character(edges$src), as.character(edges$target))
		tmptarget = c(as.character(edges$target), as.character(edges$src))
		tmpweight = c(edges$weight, edges$weight)
		tmptype = c(as.character(edges$type), as.character(edges$type))
		edges = data.frame(tmpsrc, tmptarget, tmpweight, tmptype)
		colnames(edges) = c("src", "target", "weight","type")
	}

	# make unweighted
	if(unweighted){
		show("Removing Edge Weights")
		edges[,"weight"] = 1
	}

	# resolve node duplication within edge_types
	show("Removing (n1,n2,et) Duplicates")
	edges = aggregate(x = edges$weight, by = list(edges$src,edges$target,edges$type), FUN=max)[,c(1,2,4,3)]
	colnames(edges) = c("src", "target", "weight","type")

	# edge type normalization
	if(normalize == "type"){
		show("Edge Type Normalization")
		typetable = aggregate(weight~type, data = edges, FUN=sum)
 		show(typetable)
		rownames(typetable) = as.character(typetable[,1])
		edges$weight = edges$weight / typetable[as.character(edges$type),"weight"]
	}

	# resolve node duplication across edge_types

	#output normalized matrix
	if(FALSE){
		write.table(edges, paste(networkfile,".clean"), quote=F, sep="\t", col.names=F, row.names=F)
	}

	# calculate what matrix format is need (sparce matrix or personal list format)
	forcelarge = 0
	node_estimate = length(unique(c(as.character(edges[,1]),as.character(edges[,2]))))
	boolSparceMat = (node_estimate^2 < .Machine$integer.max) * (1-forcelarge)   # will fit in sparce mat

	# create adjacency matrix
	transmat = NULL
	nodenames = NULL
	colsum = NULL

	if(boolSparceMat) {
		# convert edgelist to matrix
		n1Matrix = threeCol2MaxMat( as.character(edges[,"src"]),  as.character(edges[,"target"]), as.numeric(edges[,"weight"]))
		nodenames = rownames(n1Matrix)
		# column normalize
		colsum = colSums(n1Matrix)
		transmat = t(t(n1Matrix)/colsum)
		rm(n1Matrix)
	} else {  # must use personal list format
		ll = threeCol2listMat( as.character(edges[,"src"]),  as.character(edges[,"target"]), as.numeric(edges[,"weight"]))
		transmat = ll
		nodenames = ll$avals
		colsum = ll$colsums
		rm(ll)
	}
	nnodes = length(nodenames)

	# Stage 0
	# read gene universe file
	universe = read.table(unifile)
	rownames(universe) = as.character(universe[,1])
	if(dim(universe)[2]<2){
		universe = cbind(as.character(universe[,1]), rep(1, length(universe[,1])) )
	}
	uniIDs = sort(intersect(nodenames, as.character(unique(universe[,1]))))

	if(length(uniIDs)<1){ return(-1)}

	for (restart in restarts){
		# restart = 0.3
		# store results in table
		evaltabstart = cbind(nodenames,-1,0)
		colnames(evaltabstart) = c("node", "type", "universe")
		rownames(evaltabstart) = nodenames
		evaltabstart[uniIDs,"universe"] = 1

		# store node type info in table
		evaltabstart[as.character(features[,1]),"type"] = as.character(features[,2])

		# run baseline rwr network
		basefile = paste(sep="", outdir, uni, ".", network, ".", directed, ".", weight, ".", normalize, ".", maxiters, ".", thresh, ".", restart, ".base")
		show(basefile)

		blankvec = structure(rep(0,nnodes), names = nodenames)
		startvec = blankvec + 1 / nnodes
		biter=0
		if(!file.exists(basefile)){
			query = blankvec
			midxs = match(uniIDs, nodenames)
			query[midxs] = as.numeric(universe[uniIDs,2])

			rwr_res = RWR(boolSparceMat, transmat, restart, query, startvec, maxiters, thresh)

			biter = rwr_res$iter
			evaltabstart = cbind(evaltabstart,as.numeric(rwr_res$vec))
			colnames(evaltabstart)[4] = "baseline"

			if(!file.exists(basefile)){
				write.table(evaltabstart, basefile, quote=F, sep="\t", row.names=T, col.names=NA)
			}
		} else { # read in base results into startvec
			evaltabstart = read.table(basefile)
		}

		startvec[nodenames] = as.numeric(evaltabstart[nodenames,"baseline"])  #starting distribution at baseline may speed convergence

		# read positive set names
		tmpname = tail(unlist(strsplit(possetfile, "/")),1)
		possetpref = gsub(tmpname,"",possetfile)
		possets = as.character(read.table(possetfile)[,1])

		for (qfile in possets){
			#qfile = possets[1]
			queryfile = paste(sep="/", possetpref, qfile)

			posset = tail(unlist(strsplit(queryfile, "/")),1)
			posset = gsub(".txt","",posset)

			show(queryfile)
			if(file.info(queryfile)$size == 0){show(c("Empty file ", queryfile)); next}

			# Stage 1
			# read query file
			query_gs = read.table(queryfile)
			if(dim(query_gs)[2]<2){
				query_gs = cbind(as.character(query_gs[,1]), rep(1, length(query_gs[,1])) )
			}
			rownames(query_gs) = as.character(query_gs[,1])
			queryIDs = sort(intersect(uniIDs, as.character(unique(query_gs[,1]))))
			nquery = length(queryIDs)

			set.seed(041185)
			folds = rep(0, nquery)
			if(nfolds>1){
				folds = sample(cut(seq(1,nquery),breaks=nfolds,labels=FALSE))
			}

			for(iter in 1:nfolds){
				# iter = 1
				## read query set
				outfile = paste(sep="", outdir, uni, ".", network, ".", directed, ".", weight, ".", normalize, ".",  maxiters, ".", thresh, ".", st2keep, ".", nfolds, ".", restart, ".", posset, ".", iter, ".rwr")
				show(outfile)
				if(file.exists(outfile)){
					show(c("WARNING: File already exists ", outfile));
					#next;
				}

				## separate training and testing
				train_idxs = which(folds!=iter)
				train_nidxs = queryIDs[train_idxs]
				ntrain = length(train_nidxs)

				test_idxs = which(folds==iter)
				testuni = as.character(setdiff(uniIDs,train_nidxs))
				if(nfolds==1){
					test_idxs = train_idxs
					testuni = as.character(uniIDs)
				}
				test_nidxs = queryIDs[test_idxs]
				ntest = length(test_nidxs)
				ntestuni = length(testuni)

				if(ntrain*ntest*1.0*ntestuni==0){ ## either no training or testing examples
					row1 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "baseline", -1000, -1000, -1000)
					row2 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage1", -1000, -1000, -1000)
					row3 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "diff", -1000, -1000, -1000)
					row4 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage2", -1000, -1000, -1000)
					restable = rbind(restable, row1, row2, row3, row4)
					next
				}

				# run RWR on training set
				query = blankvec
				midxs = match(train_nidxs, nodenames)
				query[midxs] = as.numeric(query_gs[train_nidxs,2])
				rwr_res = RWR(boolSparceMat, transmat, restart, query, startvec, maxiters, thresh)

				# store results
				qiter = rwr_res$iter
				evaltab = cbind(evaltabstart,0,0,as.numeric(rwr_res$vec[nodenames]))
				colnames(evaltab) = c("node", "type", "universe", "baseline", "train", "test", "stage1")
				diff = as.numeric(evaltab[,"stage1"]) - as.numeric(evaltab[,"baseline"])
				evaltab = cbind(evaltab,diff)
				evaltab[train_nidxs,"train"] = 1
				evaltab[test_nidxs,"test"] = 1

				# eval of baseline rwr on pos set
				model = prediction(as.numeric(evaltab[testuni,"baseline"]), evaltab[testuni,"test"])
				auc  = performance(model, "auc")
				perf = performance(model,"tpr","fpr")
				aucval = round(as.numeric(slot(auc, "y.values")),3)
				row	= c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "baseline", aucval, 0, length(uniIDs))
				show(paste(collapse=' ', row[12:15]))
				restable = rbind(restable, row)

				# eval of query rwr on pos set
				model = prediction(as.numeric(evaltab[testuni,"stage1"]), evaltab[testuni,"test"])
				auc  = performance(model, "auc")
				perf = performance(model,"tpr","fpr")
				aucval = round(as.numeric(slot(auc, "y.values")),3)
				row = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage1", aucval, qiter, ntrain)
				show(paste(collapse=' ', row[12:15]))
				restable = rbind(restable, row)
				#plot(perf, lwd = 5)

				# eval of diff rwr on pos set
				model = prediction(as.numeric(evaltab[testuni,"diff"]), evaltab[testuni,"test"])
				auc  = performance(model, "auc")
				perf = performance(model,"tpr","fpr")
				aucval = round(as.numeric(slot(auc, "y.values")),3)
				row = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "diff", aucval, 0, ntrain)
				show(paste(collapse=' ', row[12:15]))
				restable = rbind(restable, row)
				#plot(perf, lwd = 5)

				colnames(restable) = c("network", "direct", "weight", "normalize", "uni", "restart", "maxiters", "thresh", "st2keep", "posset", "nfolds", "iter", "stage", "aucval", "rwr_iters", "ntrain")
				write.table(restable, resfile, quote=F, sep="\t", row.names=F)

				# stage 2
				# can skip in no feature nodes
				if(nfeats==0){
					row4 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage2", -1000, -1000, -1000)
					restable = rbind(restable, row4)
					write.table(restable, resfile, quote=F, sep="\t", col.names=F, row.names=F)
					if(iter == 1 && writepreds){
						write.table(evaltab, outfile, quote=F, sep="\t", row.names=F, col.names=T)
					}
					next;
				}

				# extract best features nodes
				keep = rep(-1, nnodes)
				evaltab = cbind(evaltab, keep)
				ss = sort(as.numeric(evaltab[featnodes,"diff"]), decreasing=T, index.return = T)
				sortedfeats = featnodes[ss$ix]
				keepfeats = sortedfeats[1:nkeep]
				evaltab[featnodes,"keep"] = 0
				evaltab[keepfeats,"keep"] = 1

				# keep all non-property edges
				newedges = edges[which(edges$type %in% setdiff(all_etypes, prop_etypes)),]

				# add in edges connected to kept features
				keepidxs = which(edges[,"src"] %in% keepfeats | edges[,"target"] %in% keepfeats)
				newedges = rbind(newedges, edges[keepidxs,])

				tmpnames = unique(c(as.character(newedges[,1]),as.character(newedges[,2])))
				train_nidxs2 = intersect(train_nidxs,tmpnames)
				test_nidxs2 = intersect(test_nidxs,tmpnames)
				testuni2 = intersect(testuni,tmpnames)

				# can skip if no overlap with the train or test set
				if(length(train_nidxs2)*length(test_nidxs2)*1.0*length(testuni2)==0){
					row4 = c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage2", -1000, -1000, -1000)
					restable = rbind(restable, row4)
					write.table(restable, resfile, quote=F, sep="\t", col.names=F, row.names=F)
					if(iter == 1 && writepreds){
						write.table(evaltab, outfile, quote=F, sep="\t", row.names=F, col.names=T)
					}
					next;
				}

				# edge type renormalized
				typetable2 = NULL
				if(normalize=="type"){
					typetable2 = aggregate(weight~type, data = newedges, FUN=sum)
					#show(typetable2)
					rownames(typetable2) = as.character(typetable2[,1]	)
					typetable2[as.character(newedges$type),"weight"]
					newedges$weight = newedges$weight / typetable2[as.character(newedges$type),"weight"]
					#show(newedges[1:5,])
				}

				transmat2 = NULL
				nodenames2 = NULL
				colsum2 = NULL
				# check if sparce matrix will work
				boolSparceMat2 = (length(tmpnames)^2 < .Machine$integer.max) * (1-forcelarge)  # will fit in sparce mat
				if(boolSparceMat2) {
					# convert edgelist to matrix
					n2Matrix = threeCol2MaxMat( as.character(newedges[,"src"]),  as.character(newedges[,"target"]), as.numeric(newedges[,"weight"]))
					nodenames2 = rownames(n2Matrix)
					# column normalize
					colsum2 = colSums(n2Matrix)
					transmat2 = t(t(n2Matrix)/colsum2)
					rm(n2Matrix)
				} else {  # must use personal list format
					ll2 = threeCol2listMat( as.character(newedges[,"src"]),  as.character(newedges[,"target"]), as.numeric(newedges[,"weight"]))
					transmat2 = ll2
					nodenames2 = ll2$avals
					colsum2 = ll2$colsums
					rm(ll2)
				}
				nnodes2 = length(nodenames2)
				rm(newedges)

				# run RWR on s2 network on training set
				blankvec2 = structure(rep(0,nnodes2), names = nodenames2)
				query2 = blankvec2
				midxs = match(train_nidxs2, nodenames2)
				query2[midxs] = as.numeric(query_gs[train_nidxs2,2])
				startvec2 = blankvec2 + 1 / nnodes2
				rwr_res = RWR(boolSparceMat2, transmat2, restart, query2, startvec2, maxiters, thresh)

				# store results
				q2iter = rwr_res$iter
				stage2 = rep(0,nnodes)
				evaltab = cbind(evaltab, stage2)
				evaltab[nodenames2,"stage2"] =  rwr_res$vec[nodenames2]

				# eval of stage2 rwr on pos set
				model = prediction(as.numeric(evaltab[testuni2,"stage2"]), evaltab[testuni2,"test"])
				auc  = performance(model, "auc")
				perf = performance(model,"tpr","fpr")
				aucval = round(as.numeric(slot(auc, "y.values")),3)
				row	= c(network, directed, weight, normalize, uni, restart, maxiters, thresh, st2keep, posset, nfolds, iter, "stage2", aucval, q2iter, length(train_nidxs2))
				show(paste(collapse=' ', row[12:15]))
				restable = rbind(restable, row)
				write.table(restable, resfile, quote=F, sep="\t", row.names=F)

				if(iter == 1 && writepreds){
					write.table(evaltab, outfile, quote=F, sep="\t", row.names=F, col.names=T)
				}

			} #end iter
		} #end queryset

		if(outdir == "exampleRun_"){
			unlink(basefile)
		}	
	} #end restart

	if(outdir == "exampleRun_"){
		unlink(resfile)
	}	
} #end function


#' Sample transition matrix.
#'
#' A Matrix containing tthe normalized transition matrix from the test network
#'
#' @format a Matrix containing the normalized transition matrix from the test network.
"transmat"

Try the DRaWR package in your browser

Any scripts or data that you put into this service are public.

DRaWR documentation built on March 24, 2022, 5:07 p.m.