R/cv_method_single_MC_corr.R

cv_method_single_MC_corr <-
function(mu0, p, m, n, alpha_list, p1, ss, ntest, pcorr, chol.rho, sampling.p )
{
	data=gen_data_corr(mu0, p, m, n, pcorr, chol.rho, sampling.p )

	d1=subset(data$x, data$y==1)
	d2=subset(data$x, data$y==-1)
	n1=nrow(d1)
	n2=nrow(d2)

	cv_data=data
	# 10 fold CV
     K=10 #10 fold CV  unless one group has less than 10, then smaller fold:
     if(min(n1,n2)<10) K=min(n1,n2) 

     cv_ids1= sample(rep(1:K, ceiling(n1/K))[1:n1], n1, replace=F)
     cv_ids2= sample(rep(1:K, ceiling(n2/K))[1:n2], n2, replace=F)

	cv_data$cv_idx=c( cv_ids1,  cv_ids2)

	cv_pcc=sapply(alpha_list, get_cv_pcc, cv_data, p1)
	pvalue_threshold=alpha_list[which.max(cv_pcc)]

	mean1=apply(d1,2,mean)
	mean2=apply(d2,2,mean)
	xsq1=apply(d1^2,2,sum)
	xsq2=apply(d2^2,2,sum)
	s1= xsq1-n1*mean1^2
	s2= xsq2-n2*mean2^2
	pool.sd= sqrt((s1+s2)/(n-2))
	zscore= (mean1-mean2)/( pool.sd*sqrt(1/n1+1/n2))
	pvalue=2*pt(abs(zscore),df=n-2, lower.tail=F)
	weight=get_weight_zp(zscore, pvalue, pvalue_threshold)

	u=sum(weight[1:m])*mu0
	v=sum(abs(weight))

	testdata=gen_data_corr(mu0, p, m, ntest, pcorr, chol.rho)

	if (all(weight==0))
	{
		pred=rbinom(length(testdata$y), size=1, prob=p1)
	}
	else
	{
		cl_cutoff=1/2*log((1-p1)/p1)*dot(weight, weight)/dot(zscore/sqrt(nrow(data$x)), weight)
		pred=as.numeric((testdata$x %*% weight)>cl_cutoff)
	}
	
	# convert (0,1) to (-1, 1)
	pred=pred*2-1
	#mean(pred==testdata$y) 

if (ss==F)
        c(mean(pred==testdata$y) )
else
        n1test=ntest/2
        c(mean(pred==testdata$y), mean(pred[1:n1test]==testdata$y[1:n1test]), mean(pred[(n1test+1):ntest]==testdata$y[(n1test+1):ntest]) )  
}

Try the HDDesign package in your browser

Any scripts or data that you put into this service are public.

HDDesign documentation built on May 2, 2019, 6:41 a.m.