R/matchLabels.R

Defines functions matchLabels overlapTable

Documented in matchLabels overlapTable

# Relabel the labels in source such that modules with high overlap with those in reference will have the
# same labels


overlapTable = function(labels1, labels2, na.rm = TRUE, ignore = NULL,
                        levels1 = NULL, levels2 = NULL, log.p = FALSE)
{
  labels1 = as.vector(labels1);
  labels2 = as.vector(labels2);
  if (length(labels1)!=length(labels2)) stop("Lengths of 'labels1' and 'labels2' must be the same.")
  if (na.rm)
  {
    keep = !is.na(labels1) & !is.na(labels2);
    labels1 = labels1[keep];
    labels2 = labels2[keep];
  }
  if (is.null(levels1)) 
  {
    levels1 = sort(unique(labels1));
    levels1 = levels1[!levels1 %in%ignore];
  }
  if (is.null(levels2))
  {
    levels2 = sort(unique(labels2));
    levels2 = levels2[!levels2 %in%ignore];
  }
  n1 = length(levels1);
  n2 = length(levels2);
  countMat = matrix(0, n1, n2);
  pMat = matrix(0, n1, n2);
  expected = matrix(0, n1, n2);
  nAll = length(labels1);
  if (n1 > 0 && n2 > 0) for (m1 in 1:n1)
    for (m2 in 1:n2)
    {
      m1Members = (labels1 == levels1[m1]);
      m2Members = (labels2 == levels2[m2]);
      .n1 = sum(m1Members);
      .n2 = sum(m2Members);
      .n12 = sum(m1Members & m2Members);
      #tab = .table2.allLevels(m1Members, m2Members, levels.x = c(FALSE, TRUE), levels.y = c(FALSE, TRUE));
      #print(paste("table for levels", levels1[m1], levels2[m2]));
      #print(table(m1Members, m2Members));
      #pMat[m1, m2] = fisher.test(tab, alternative = "greater")$p.value;
      pMat[m1, m2] = if (.n12 > 0) phyper(.n12-1, .n1, nAll - .n1, .n2, lower.tail = FALSE, log.p = log.p) else 1-log.p;
      countMat[m1, m2] = .n12;
      expected[m1, m2] = .n1 * .n2/nAll
    }
 dimnames(pMat) = dimnames(countMat) = dimnames(expected) = list(levels1, levels2);
 fraction = countMat/expected;
 fraction[expected==0] = 0;
 pMat[is.na(pMat)] = 1;

 list(countTable = countMat, pTable = pMat, expected = expected, fraction = fraction);
}
 

matchLabels = function(source, reference, pThreshold = 5e-2, na.rm = TRUE,
                       ignoreLabels = if (is.numeric(reference)) 0 else "grey", 
                       extraLabels = if (is.numeric(reference)) c(1:1000) else standardColors())
{
  source = as.matrix(source);
  if (nrow(source)!=length(reference))
    stop("Number of rows of 'source' must equal the length of 'reference'.");

  result = array(NA, dim = dim(source));
  #refMods = as.numeric(sort(unique(reference)));
  #refMods = refMods[!refMods %in% ignoreLabels];
  for (col in 1:ncol(source))
  {
    src = source[, col]
    tab = overlapTable(src, reference, na.rm = na.rm, ignore = ignoreLabels);
    if (any(dim(tab$countTable)==0))
    {
      result[, col] = src;
      next;
    }
    pTab = tab$pTable;
    pOrder = apply(pTab, 2, order);
    bestOrder = order(apply(pTab, 2, min));

    refMods = colnames(pTab);
    if (is.numeric(reference)) refMods = as.numeric(refMods);
    sourceMods = rownames(pTab);
    newLabels = rep(NA, length(sourceMods));
    names(newLabels) = sourceMods;
    for (rm in 1:length(bestOrder)) 
    {
      bestInd = 1;
      done = FALSE;
      #printFlush(paste("Looking for best match for reference module ", refMods[bestOrder[rm]]));
      while (!done && bestInd < length(sourceMods))
      {
        bm = pOrder[bestInd, bestOrder[rm]];
        bp = pTab[bm, bestOrder[rm]];
        if (bp > pThreshold)
        {
            done = TRUE;
        } else if (is.na(newLabels[bm])) {
            #newLabels[bm] = as.numeric(refMods[bestOrder[rm]]);
            #printFlush(paste("Labeling old module ", sourceMods[bm], "as new module",
            #                 refMods[bestOrder[rm]], "with p=", bp));
            newLabels[bm] = refMods[bestOrder[rm]];
            done = TRUE;
        }
        bestInd = bestInd + 1;
      }
    }
    if (length(ignoreLabels) > 0)
    {
      newLabels.ignore = ignoreLabels;
      names(newLabels.ignore) = ignoreLabels;
      newLabels = c(newLabels.ignore, newLabels);
    }

    unassigned = src %in% names(newLabels)[is.na(newLabels)];
    if (any(unassigned))
    {
      unassdSrcTab = table(src[!src %in% names(newLabels)]);
      unassdRank = rank(-unassdSrcTab, ties.method = "first");

      nExtra = sum(is.na(newLabels));
      newLabels[is.na(newLabels)] = extraLabels[ !extraLabels %in% 
                                       c(refMods, ignoreLabels, names(newLabels))] [1:nExtra]; 
    }

    result[, col] = newLabels[match(src, names(newLabels))];
  }

  result;
}
    
          
          
        
      

Try the WGCNA package in your browser

Any scripts or data that you put into this service are public.

WGCNA documentation built on Jan. 22, 2023, 1:34 a.m.