R/mixedVarSim.R

#' @export
#' @name mixedVarSim
#' @title Similarity between two variables
#' @description Returns the similarity between two quantitative variables, two qualitative
#' variables or a quantitative variable and a qualitative variable.  The
#' similarity between two variables is defined as a square cosine: the square
#' of the Pearson correlation when the two variables are quantitative; the
#' correlation ratio when one variable is quantitative and the other one is
#' qualitative; the square of the canonical correlation between two sets of
#' dummy variables, when the two variables are qualitative.
#' @param X1 a vector or a factor
#' @param X2 a vector or a factor

mixedVarSim <- function (X1,X2) 
{  #cos2 of canonical analysis
  n <- length(X1)
  if ((is.numeric(X1) && (is.numeric(X2)))) { #cas quanti-quanti
    Z1 <- PCAmixdata::recodquant(X1)$Z
    Z2 <- PCAmixdata::recodquant(X2)$Z
    sim <- (t(Z1)%*%Z2/n)^2
  }
  if ((is.numeric(X1) && (!is.numeric(X2)))) { #cas quanti-quali
    Z1 <- PCAmixdata::recodquant(X1)$Z
    G2 <- PCAmixdata::recodqual(X2)
    ns <- apply(G2,2,sum)
    A <- t(G2)%*%Z1/ns
    sim <- sum((A^2*ns/n)) 
  }
  if ((!is.numeric(X1) && (is.numeric(X2)))) { #cas quali-quanli
    G1 <- PCAmixdata::recodqual(X1)
    Z2 <- PCAmixdata::recodquant(X2)$Z
    ns <- apply(G1,2,sum)
    A <- t(G1)%*%Z2/ns
    sim <- sum((A^2*ns/n)) 
  }
  if ((!is.numeric(X1) && (!is.numeric(X2)))) { #cas quali-quali
    G1 <- PCAmixdata::recodqual(X1)
    ns <- apply(G1,2,sum)
    ps <- ns/n
    X1 <- sweep(G1,MARGIN=2,STATS=sqrt(ps),FUN="/")
    G2 <- PCAmixdata::recodqual(X2)
    ns <- apply(G2,2,sum)
    ps <- ns/n
    X2 <- sweep(G2,MARGIN=2,STATS=sqrt(ps),FUN="/")
    r <- ncol(X1)
    s <- ncol(X2)
    m <- which.min(c(n,r,s))
    if (m==1) {
      A1 <- X1%*%t(X1)/n
      A2 <- X2%*%t(X2)/n
      A <- A1%*%A2
      e <- eigen(A)
      sim <- Re(e$values[2]) 
    } else {
      V12 <- t(X1)%*%X2/n
      V21 <- t(X2)%*%X1/n
      if (m==2) V<-V12%*%V21
      if (m==3) V<-V21%*%V12
      e <- eigen(V)
      sim <- Re(e$values[2]) 
    }	
  }		
  return(sim)
}

Try the ClustOfVar package in your browser

Any scripts or data that you put into this service are public.

ClustOfVar documentation built on May 2, 2019, 12:37 p.m.