R/idmatchv2.R

idmatchv2 <-
function(m1,m2,id.col1,id.col2=id.col1){
#
#  Same as idmatch, but also return cases not matched
#
#  OUTPUT:
#  m combined data for which there are matching id's 
#  m1.no  data in m1 for which there are no matching id's in m2
#  m2.no  data in m2 for which there are no matching id's in m1
#
flag=!is.na(m1[,id.col1])
m1=m1[flag,]  # eliminate any rows where ID is missing
flag=!is.na(m2[,id.col1])
m2=m2[flag,]
M1=NULL
idnm1=NULL
ic1=0
idnm2=NULL
ic2=0
if(sum(duplicated(m1))>0)stop('Duplicate ids detected in m1')
if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2')
for(i in 1:nrow(m1)){
flag=duplicated(c(m1[i,id.col1],m2[,id.col2]))
if(sum(flag)==0){
ic1=ic1+1
idnm1[ic1]=i
}
if(sum(flag>0)){
if(is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,]))
if(is.null(dim(M1)))M1=as.data.frame(m1[i,])
}
if(!is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,m1[i,])
if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1)
}
}}
M2=NULL
for(i in 1:nrow(m2)){
flag=duplicated(c(m2[i,id.col2],m1[,id.col1]))
if(sum(flag)==0){
ic2=ic2+1
idnm2[ic2]=i
}
if(sum(flag>0)){
if(is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,]))
if(is.null(dim(M2)))M2=as.data.frame(m2[i,])
}
if(!is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,m2[i,])
if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1)
}
}}
m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2])
nc1=ncol(m2)-1
m1u=NULL
if(!is.null(idnm1))m1u=m1[idnm1,]
m2u=NULL
if(!is.null(idnm2))m2u=m2[idnm2,]
list(m=m,idnm1=idnm1,idnm2=idnm2,m1.no=m1u,m2.no=m2u)
}
musto101/wilcox_R documentation built on May 23, 2019, 10:52 a.m.