Nothing
#' Removes missing values (rows and column to obtain a large full matrix)
#' @export
#' @description Removes missing values (rows and column alternatively) to obtain a large full matrix
#' @param X the dataset (matrix) with missing values
#' @param nbstep number of cutting steps (may remove several rows or columns at each step)
#' @param std (boolean) remove constant covariates
#' @param verbose (boolean) to print the result
#' @param coercing vector of the covariates to keep (names or index)
#' @param Xout (boolean) to export or not the reduced matrix (if not, indices are sufficient)
#' @return
#' \item{individus_restants}{Index of remaining individuals}
#' \item{variables_restantes}{Index of remaining variables}
#' \item{X}{If Xout=TRUE, the reduced dataset without missing values}
#' @examples
#' data<-mtcars
#' require(CorReg)
#' datamiss=Terminator(target = data,wrath=0.05)#5% of missing values
#' datamiss
#' showdata(datamiss)#plot positions of the missing values
#' reduced=Conan(X=datamiss)
#' reduced
Conan<-function(X=X,nbstep=Inf,std=FALSE,verbose=FALSE,coercing=NULL,Xout=TRUE){
steploc=1
nloc=nrow(X)
ploc=ncol(X)
n=nloc
p=ploc
individus_restants=1:n
variables_restantes=1:p
if(!is.null(coercing)){#on a des variables a garder
#on va purger les individus manquants sur ces variables (pas le choix)
for(i in 1:length(coercing)){
if(is.numeric(coercing[i])){
j=coercing[i]
}else{
j=which(names(X)==coercing[i])[1] #si doublon on prend le premier
}
if(anyDuplicated(c(1:ploc,j))>0){#si on ne tape pas a cote
quimank=which(is.na(X[,j]))
if(length(quimank)>0){
X=X[-quimank,]
individus_restants=individus_restants[-quimank]
}
}
}
nloc=nrow(X)
ploc=ncol(X)
}
#nbmank/prod(dim(basefull))
if(nbstep==Inf){
nbstep=max(c(nloc,ploc))
}
if(verbose){cat(dim(X))}
if(std){#nettoyage des constantes si demande
sd_loc<-function(vect){
return(sd(vect[!is.na(vect)]))
}
quiconst=which(apply(X,2,sd_loc)==0)
if(length(quiconst)>0){
X=X[,-quiconst]
variables_restantes=variables_restantes[-quiconst]
}
}
if(verbose){cat(dim(X))}
quimank=which(is.na(X),arr.ind = TRUE)
nbmank=dim(quimank)[1]
M=Matrix::Matrix(0,nrow = nrow(X),ncol=ncol(X))
M[quimank]=1
while (steploc<=nbstep & nbmank>0 & nloc>0 & ploc>0){
if(verbose){cat(steploc)}
nbmank_var=colSums(M)/nloc#taux=nbmank colonne/ nombre de ligne
nbmank_ind=rowSums(M)/ploc
if(max(nbmank_var)>max(nbmank_ind)){#on supprime variable
quisuppr=which(nbmank_var==max(nbmank_var))
X=X[,-quisuppr]
M=M[,-quisuppr]
variables_restantes=variables_restantes[-quisuppr]
}else{#on supprime ligne
if(verbose){cat("lign")}
quisuppr=which(nbmank_ind==max(nbmank_ind))
if(length(quisuppr)>0){#si il y a quelque chose a supprimer
X=X[-quisuppr,]
M=M[-quisuppr,]
individus_restants=individus_restants[-quisuppr]
if(verbose){cat(dim(X))}
if(std){#nettoyage des constantes si demande
quiconst=which(apply(X,2,sd_loc)==0)
if(length(quiconst)>0){
X=X[,-quiconst]
variables_restantes=variables_restantes[-quiconst]
}
}
}
}
if(verbose){cat(dim(X)) }
nloc=nrow(X)
ploc=ncol(X)
quimank=which(is.na(X),arr.ind = TRUE)
nbmank=dim(quimank)[1]
steploc=steploc+1
}
if(verbose){
cat(paste("I've killed ",n-nloc,"individuals and ",p-ploc," covariates."))
}
if(Xout){
return(list(X=X,individus_restants=individus_restants,variables_restantes=variables_restantes))
}else{
return(list(individus_restants=individus_restants,variables_restantes=variables_restantes))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.