R/selectpin.R

Defines functions findpins findsmears containment.indicator

Documented in findpins findsmears

containment.indicator <- function(vstart, vend, wstart, wend){
  lw <- length(wstart)
  lv <- length(vstart)
  z <- cbind(c(vend,wend),c(1:lv,rep(0,lw)),c(rep(0,lv),1:lw))
  z<-z[order(z[,1]),]
  endbeforeend<-cummax(z[,2])[order(z[,3])][sort(z[,3])!=0]
  z<-cbind(c(wstart,vstart),c(rep((lv+1),lw),1:lv),c(1:lw,rep(0,lv)))
  z<-z[order(z[,1]),]
  startafterstart<-rev(cummin(rev(z[,2])))[order(z[,3])][sort(z[,3])!=0]
  return(cbind(startafterstart,endbeforeend))
}



#'Generate the smear tables.
#'
#'Given the breakpoint tables of all single cells generated by \code{preprocess_segfile},
#'extend the breakpoint bins into intervals containing several bins to
#'allow for the uncertainty inherent in segmentation.
#'@param breakpoint_table The breakpoint_table generated by \code{preprocess_segfile}.
#'@param smear An integer. The number of spanning bins on one side. Default value: 2.
#'@param keepboundaries Logical. If TRUE (Default), the bins on chromosome boundaries are kept.
#'@param mask_XY Logical. If TRUE (Default), sex chromosomes (X, Y) are masked.
#'@return smear_table. The cell names (profid), the chromosome (chrom) and bin location (bpstart, bpend)
#'for the extended intervals with length of 2*smear and the change direction (bpsign).
#'@export


findsmears <- function(breakpoint_table, smear = 2, keepboundaries = TRUE, mask_XY = TRUE){

  smearchar <- as.character(smear)

  ## censor the breakpoint table
  censored<-(breakpoint_table[,"chromstart"]>=dropareas[breakpoint_table[,"chrom"],"from"])&
    (breakpoint_table[,"chromend"]<=dropareas[breakpoint_table[,"chrom"],"to"])
  censoredtoo<-(which(censored)+1)[(which(censored)+1)<=nrow(breakpoint_table)]
  censored[censoredtoo]<-((breakpoint_table[censoredtoo,"chrom"]==breakpoint_table[censoredtoo-1,"chrom"])&
                            (breakpoint_table[censoredtoo,"profid"]==breakpoint_table[censoredtoo-1,"profid"]))|censored[censoredtoo]

  ## the smear table based on the breakpoint table
  smear_table <- cbind(breakpoint_table[, c("profid", "chrom")],
                   breakpoint_table[,"segstarts"] - smear, breakpoint_table[,"segstarts"] + smear,
                   sign(breakpoint_table[,"cvals"] - c(0, breakpoint_table[-nrow(breakpoint_table), "cvals"])))
  dimnames(smear_table)[[2]]<-c("profid", "chrom", "bpstart", "bpend", "bpsign")

  ## start and end seg(bin) location of each chromosome
  chrstart <- breakpoint_table[match(unique(breakpoint_table[,"chrom"]), breakpoint_table[,"chrom"]), "segstarts"]
  chrend <- c((chrstart - 1)[-1], breakpoint_table[nrow(breakpoint_table), "segends"])


  ## boundaries
  if(!keepboundaries){
    smear_table <- smear_table[((smear_table[,"bpstart"] + smear) > chrstart[smear_table[,"chrom"]])&!censored,]
    smear_table[smear_table[,"bpstart"] < chrstart[smear_table[,"chrom"]], "bpstart"] <-
      chrstart[smear_table[smear_table[,"bpstart"] < chrstart[smear_table[,"chrom"]], "chrom"]]
    smear_table[smear_table[,"bpend"] > chrend[smear_table[,"chrom"]], "bpend"] <-
      chrend[smear_table[smear_table[,"bpend"]>chrend[smear_table[,"chrom"]],"chrom"]]
  }

  if(keepboundaries){
    smear_table[(smear_table[,"bpstart"]+smear)==chrstart[smear_table[,"chrom"]],"bpsign"]<-
      2*smear_table[(smear_table[,"bpstart"]+smear)==chrstart[smear_table[,"chrom"]],"bpsign"]
    smear_table<-smear_table[!censored,]
  }


  ## X,Y chromosome
  if(mask_XY){
    chromrange = 1:22
  }else{
    chromrange = 1:24
  }
  smear_table <- smear_table[smear_table[, "chrom"]%in%chromrange,]


  return(smear_table)

}



#'Select features and generate the incidence table.
#'
#'Select features (called as pins), generate the binary matrix with rows as pins and columns as cells.
#'@param breakpoint_table The breakpoint_table generated by \code{preprocess_segfile}.
#'@param smear_table The smear_table generated by \code{findsmears}.
#'@return A list of three objects. pinmat is the incidence table. pins is the bin location and sign for
#'the selected features (pins).
#'@export


findpins <- function(breakpoint_table, smear_table){
  allsigns <- sort(unique(smear_table[, "bpsign"]))
  for(vsign in allsigns){
    a <- smear_table[smear_table[,"bpsign"] == vsign,]
    a <- a[order(a[,"bpend"]),]
    apins <- NULL
    while(nrow(a) > 0){
      apins <- c(apins, a[1, "bpend"])
      a <- a[!(a[,"bpstart"] <= apins[length(apins)] & a[,"bpend"] >= apins[length(apins)]),,drop = F]
    }
    a <- smear_table[smear_table[,"bpsign"] == vsign,]
    ci <- containment.indicator(apins, apins, a[order(a[,"bpend"]), "bpstart"], a[order(a[,"bpend"]), "bpend"])
    a <- cbind(a[order(a[,"bpend"]),], ci)
    dimnames(a)[[2]][(ncol(a)-1):ncol(a)]<-c("startpin","endpin")
    apinmat<-matrix(ncol=length(unique(breakpoint_table[,"profid"])),nrow=length(apins)+2,data=0,
                    dimnames=list(NULL,unique(breakpoint_table[,"profid"])))
    for(id in unique(a[,"profid"])){
      apinmat[a[a[,"profid"]==id,"startpin"]+1,id]<-1
      apinmat[a[a[,"profid"]==id,"endpin"]+2,id]<-apinmat[a[a[,"profid"]==id,"endpin"]+2,id]-1
    }
    apinmat<-apply(apinmat,2,cumsum)
    apinmat<-apinmat[-c(1,nrow(apinmat)),,drop=F]
    apins<-cbind(apins,rep(vsign,length(apins)))
    dimnames(apins)[[2]]<-c("bin","sign")
    if(vsign == min(allsigns)){
      pinmat <- apinmat
      pins <- apins
    }
    else{
      pinmat <-rbind(pinmat, apinmat)
      pins <- rbind(pins, apins)
    }
  }

  pins <- pins[(rowSums(pinmat) < ncol(pinmat)),,drop = F]
  pinmat <- pinmat[(rowSums(pinmat) < ncol(pinmat)),,drop = F]
  cell_names <- colnames(pinmat)
  return(list(pins = pins, pinmat = pinmat, cell_names = cell_names))

}




## tshort --> breakpoint_table
## dtshort --> smear_table
JunyanSong/SCclust documentation built on April 16, 2022, 8:44 p.m.