Nothing
#library(Rcpp)
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/correlation_mod2.cpp")
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/fast_apply_nb_na.cpp")
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/fast_apply_nb_not_na.cpp")
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/fast_apply_sd_na_rm_T.cpp")
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/fast_apply_sum_na_rm_T.cpp")
#sourceCpp("C:/Users/Quentin/Documents/article_imp4p/article/fast_sim.cpp")
#Function to impute data sets with the SLSA algorithm
impute.slsa=function(tab, conditions, repbio=NULL, reptech=NULL, nknn=30, selec="all", weight="o", ind.comp=1, progress.bar=TRUE){
if (is.null(repbio)){repbio=as.factor(1:length(conditions));}
if (is.null(reptech)){reptech=as.factor(1:length(conditions));}
if (nknn>sqrt(nrow(tab))){nknn=floor(sqrt(nrow(tab)));warning(paste0("The chosen number of nearest neighbours is too high. It has been fixed to the maximum value (floor(sqrt(nrow(tab))): ",floor(sqrt(nrow(tab)))));}
if (nknn<=5){warning("The chosen number of nearest neighbours is low (<=5).");}
if (ncol(tab)<2){warning("The number of columns (samples) has to be superior to 1 in tab.");}
tab_imp=as.matrix(tab);
new_tab=tab
new_tab.imp=tab_imp
new_conditions=NULL
new_repbio=NULL
new_reptech=NULL
index=NULL
k=1
for (j in 1:length(levels(conditions))){
index=c(index,which(conditions==levels(conditions)[j]))
nb_rep=sum((conditions==levels(conditions)[j]));
new_tab[,(k:(k+nb_rep-1))]=tab[,which(conditions==levels(conditions)[j])]
new_tab.imp[,(k:(k+nb_rep-1))]=tab_imp[,which(conditions==levels(conditions)[j])]
new_conditions=c(new_conditions,conditions[which(conditions==levels(conditions)[j])])
new_repbio=c(new_repbio,repbio[which(conditions==levels(conditions)[j])])
new_reptech=c(new_reptech,reptech[which(conditions==levels(conditions)[j])])
k=k+nb_rep
}
tab=new_tab
tab_imp=new_tab.imp
conditions=new_conditions
repbio=new_repbio
reptech=new_reptech
conditions=factor(as.character(conditions),levels=as.character(unique(conditions)));
repbio=factor(as.character(repbio),levels=as.character(unique(repbio)));
reptech=factor(as.character(reptech),levels=as.character(unique(reptech)));
nb_cond=length(levels(conditions));
nb_rep=rep(0,nb_cond);
k=1;
for (n in 1:nb_cond){
if (progress.bar==TRUE){
cat(paste("\n Imputation in condition ",n,"...\n In progress: "));
}
nb_rep[n]=sum((conditions==levels(conditions)[n]));
repb=repbio[(k:(k+nb_rep[n]-1))];
rept=reptech[(k:(k+nb_rep[n]-1))];
xincomplete1=tab_imp[,(k:(k+nb_rep[n]-1))];
#mediane normalisation to avoid gap effects in samples of a same condition
medxi1=apply(xincomplete1,2,median,na.rm=T)
for (j in 1:ncol(xincomplete1)){
xincomplete1[,j]=xincomplete1[,j]-medxi1[j]
}
asd=fast_apply_sd_na_rm_T(xincomplete1,1);#apply(xincomplete1,1,sd,na.rm=T);
#Imputation function for each row
imputeHLS=function(roww){
#Progress bar
if (progress.bar==TRUE){
if (roww[1]%/%(floor(0.1*nrow(xincomplete1)))==roww[1]/(floor(0.1*nrow(xincomplete1)))){cat(c(0.1*(roww[1]%/%(floor(0.1*nrow(xincomplete1))))*100,"% - "));}
}
xnbr=roww[1]
roww=roww[2:length(roww)];
naroww=is.na(roww);
# if at least one missing value else no imputation of the row
if (sum(naroww)!=0){
#if at least one observed value else no imputation of the row
if (sum(naroww)!=length(roww)){
row.exp=which(naroww);
prot=roww[-row.exp];
if (is.numeric(selec)){
xincomplete=xincomplete1[base::sample.int(n=nrow(xincomplete1),size=selec), ];
}else{
xincomplete=xincomplete1;
}
cand_x=xincomplete[, -row.exp, drop=F];
#similarity measures:
#euclidean distance between side-by-side observed values
sim=fast_sim(prot,cand_x);
sim=exp(-sqrt(sim));
osim=order(sim, decreasing=T);
#row.cand is the set of values from which linear models are fitted
#row.impcand is the set of values allowing to predict responses from fitted linear models
#At least nknn observed values are needed in each column of row.cand and row.impcand
Kmin=0;
kl=1;
ind.comp.temp=ind.comp
while (Kmin<(nknn)){
kl=kl+1;
row.idx=osim[2:(kl+1)];
row.r=sim[row.idx];
row.cand=cand_x[row.idx, , drop=F];
indcand=fast_apply_nb_na(row.cand,1);
indic=which(indcand!=length(row.cand[1,]));
if (ind.comp.temp==1){indic=which(indcand==0);}
row.idx=row.idx[indic];
row.r=row.r[indic];
row.cand=row.cand[indic,];
row.impcand=xincomplete[row.idx, row.exp, drop=F];
if (is.matrix(row.cand)){
nborc=fast_apply_nb_not_na(row.cand,2);
}else{nborc=sum(!is.na(row.cand));}
nboric=fast_apply_nb_not_na(row.impcand,2);
Kmin=min(c(nborc,nboric));
if (ind.comp.temp==1){
if (kl>=700){
kl=1;ind.comp.temp=0;
}
#if (ind.comp==0){Kmin=nknn;}
}
}
#Let's go to fit linear models and responses
#1) creating design matrices
rrepb=as.factor(as.numeric(repb[-row.exp]));
rrept=as.factor(as.numeric(rept[-row.exp]));
rrepb1=as.factor(as.numeric(repb[row.exp]));
rrept1=as.factor(as.numeric(rept[row.exp]));
lrb=levels(as.factor(as.numeric(repb)));
lrt=levels(as.factor(as.numeric(rept)));
llrb=length(lrb);
llrt=length(lrt);
mm=rep(1,length(prot));
mm1=rep(1,length(row.exp));
if (llrb>1){
for (ki in 2:llrb){
mm=cbind(mm,1*(rrepb==lrb[ki]));
mm1=cbind(mm1,1*(rrepb1==lrb[ki]));
}
}
if (llrt>1){
for (ki in 2:llrt){
mm=cbind(mm,1*(rrept==lrt[ki]));
mm1=cbind(mm1,1*(rrept1==lrt[ki]));
}
}
if (nrow(mm)>1){
mm[,c(FALSE,(apply(mm[,2:length(mm[1,])],2,sum)<=1))]=rep(0,length(mm[,1]));
}else{
mm[2:length(mm)][mm[2:length(mm)]<=1]=0;}
#2) compute matrix of responses (y)
nri=nrow(row.impcand);
nci=ncol(row.impcand);
y=matrix(0, nrow=nri, ncol=nci);
for(i in 1:nri) {
if (is.matrix(row.cand)){
mx=cbind(mm,row.cand[i,]);
ll=!is.na(row.cand[i,]);
mx=mx[ll,];
if (is.matrix(mx)){
lg=lm.fit(x=mx,y=prot[ll])$coefficients;
lg[is.na(lg)]=0;
#if (sum(1*ll)<3){lg=rep(0,length(lg));}
}else{lg=rep(0,length(mx));
lg[length(mx)]=prot[ll]/mx[length(mx)];
#if (sum(1*ll)<3){lg=rep(0,length(lg));}
}
}else{lg=rep(0,length(mm)+1);
lg[length(lg)]=prot/row.cand[i];
}
mx1=cbind(mm1,row.impcand[i,]);
y[i, ]=mx1%*%lg;
}
#delete outlier responses
for (j in 1:ncol(y)){
qyj=quantile(y[,j],na.rm=T);
bi=qyj[3]-0.5*(qyj[4]-qyj[2]);
bs=qyj[3]+0.5*(qyj[4]-qyj[2]);
y[y[,j]<bi,j]=NA;
y[y[,j]>bs,j]=NA;
}
#compute the weight function
if (is.numeric(weight)==TRUE){w=row.r^weight;}
if (weight=="o"){w=(row.r**2/(1-row.r**2+0.000001))**2;}
#final imputation by weighting the responses
roww[row.exp]<-apply(y, 2, function(x){xx=x[!is.na(x)];ww=w[!is.na(x)];ww[1:min(c(nknn,length(ww)))]=ww[1:min(c(nknn,length(ww)))]/sum(ww[1:min(c(nknn,length(ww)))]);sum(ww[1:min(c(nknn,length(ww)))]*xx[1:min(c(nknn,length(ww)))]);})
#to limit imputations in a reasonable neighborhood
l.ol=sum(roww[row.exp]<(mean(roww[-row.exp])-2*quantile(asd,probs=0.9,na.rm=T)));
l.ou=sum(roww[row.exp]>(mean(roww[-row.exp])+2*quantile(asd,probs=0.9,na.rm=T)));
if (l.ol>0){roww[row.exp][(roww[row.exp]<(mean(roww[-row.exp])-2*quantile(asd,probs=0.9,na.rm=T)))]=(mean(roww[-row.exp])-rnorm(length(roww[row.exp][(roww[row.exp]<(mean(roww[-row.exp])-2*quantile(asd,probs=0.9,na.rm=T)))]),mean=2,sd=0.005)*quantile(asd,probs=0.9,na.rm=T));};
if (l.ou>0){roww[row.exp][(roww[row.exp]>(mean(roww[-row.exp])+2*quantile(asd,probs=0.9,na.rm=T)))]=(mean(roww[-row.exp])+rnorm(length(roww[row.exp][(roww[row.exp]>(mean(roww[-row.exp])+2*quantile(asd,probs=0.9,na.rm=T)))]),mean=2,sd=0.005)*quantile(asd,probs=0.9,na.rm=T));};
}
}
return(roww);
}
xmiss = t(apply(cbind(seq_len(nrow(xincomplete1)),xincomplete1), 1, imputeHLS));
tab_imp[,(k:(k+nb_rep[n]-1))] = xmiss;
for (j in 1:ncol(tab_imp[,(k:(k+nb_rep[n]-1))])){
tab_imp[,(k:(k+nb_rep[n]-1))][,j]=tab_imp[,(k:(k+nb_rep[n]-1))][,j]+medxi1[j]
}
k=k+nb_rep[n];
}
tab_imp[,index]=tab_imp
return (tab_imp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.