R/functions.R

Defines functions matrixMultiEt main.subsetter inverse.matrix chip.selector read.iced filename.shortcut data.combine testf combine.list table.create matrixMultiE workspace.folder ratioF matrixE matrix.sparse ave.filter ave.mean.median color.bar ratio.color.bar HiC.Plot HiCRatio.Plot

Documented in ave.filter ave.mean.median chip.selector color.bar combine.list data.combine filename.shortcut HiC.Plot HiCRatio.Plot inverse.matrix main.subsetter matrixE matrixMultiE matrixMultiEt matrix.sparse ratio.color.bar ratioF read.iced table.create testf workspace.folder

#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
HiCRatio.Plot <- function(plotData,plotTitle,arrowYes=0,offsetf = 1) {
  zoomC1 = zoomC/2
  plotData$change <- log2(plotData[,3])
plotData$colvalue <- ifelse(plotData$change<0,-plotData$change, plotData$change)
rbPal <- colorRampPalette(c("white","blue"))
rbPal1 <- colorRampPalette(c("white","red"))
plotData$colvalue <- ifelse(plotData$colvalue<=ratioSBR, plotData$colvalue, ratioSBR)  #makes color values above the max = to the max range
plotData$colB <- ifelse(plotData$change<=0, plotData$colvalue, 0)
plotData$colR <- ifelse(plotData$change>=0, plotData$colvalue, 0)

plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
     ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
axis(2, at = ylabel,labels = y2label, las = 2)
axis(1, at = xlabel,labels = xlabel, las = 1)
palette(paste0(rbPal1(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colR)
palette(paste0(rbPal(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colB)
title(main=plotTitle,line = -plotTitleOffset)
if(arrowYes == 1){
  arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
  arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
}

}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
HiC.Plot <- function(plotData,plotTitle,arrowYes=0,offsetf=1) {
  
  palette(paste0(colfunc(r*log2(maxC)),"FF"))
  plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
       ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
  axis(2, at = ylabel,labels = y2label, las = 2)
  axis(1, at = xlabel,labels = xlabel, las = 1)
  points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
         ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
         pch=18,
         cex=sz,col=r*log2(plotData$V3))
  #points((HiCPlot[[i]][,1]-minbin+0.5)*1+(HiCPlot[[i]][,2]-HiCPlot[[i]][,1]+0.5)*1*offset,-((HiCPlot[[i]][,2]-minbin+0.5)*1-(HiCPlot[[i]][,1]-minbin+0.5)*1*1),pch=18,cex=sz,col=r*log2(HiCPlot[[i]][,3]), xlim=c(0,maxs), ylim=c(0,maxs))
  # arrows(0,0-(zoomC+1),0,0-(zoomC+1),col="black",lwd=2,length=.1) #Add CENtromere
  # p = HiCPlot[[2]]
  title(main=plotTitle,line = -plotTitleOffset)
  
  
  if(arrowYes == 1){
    arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
    arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
  }
  
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
ratio.color.bar <- function(lut,
                            min,nticks,max=-min,
                            ticks=seq(min, 0, len=nticks), title='') {
  scale = (length(lut)-1)/(max-min)
  tes <- 2^ticks
  p = length(tes)
  tes[p] = 0
  # tes2 = tes/2^4
  plot(c(5,5), c(min,max), pch=15,cex=sz*1, type='n', bty='n', xlim=c(0,10), xaxt='n', xlab='', yaxt='n', ylab="Folds of Change Scalebar", main=title)
  axis(2, ticks,tes ,las=1)  #red scalebar
  axis(2, -ticks,tes ,las=1) #blue scalebar
  for (i in 1:(length(lut)-1)) {
    y = (i-1)/scale + min
    rect(1,y,9,y+1/scale, col=lut[i], border=lut[i])
  }
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
color.bar <- function(C,sz) {
  #New scale bar plot:
  scalebar=1:C
  scalebar=2^(seq(log2(1), log2(C), length.out = 50))
  scalebar2=tail(scalebar,-1)
  scalebar=head(scalebar,-1)
  #plot:
  plot(5,5,pch=15,cex=sz*1, type="n",las=2, log="y", xlim=c(0,10), ylim=c(1,C), xaxt="n",xlab=NA, ylab="Interaction freq per 10 million pairs",at=c(1,2,5,10,20,50,100,200,500,1000,2000,5000,10000))
  
  rect(rep(1, length(scalebar)),scalebar,
       rep(9, length(scalebar)),scalebar2,pch=15,cex=sz*1, col=r*log2(scalebar),border = r*log2(scalebar))
  
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
ave.mean.median <- function(aveType,aveLArg,rc,pT,arrowsYes=0,offsetf=1, noReflect=1){
  
  if (noReflect != 1){
    aveLArg[[1]]  = ave.filter(list(aveLArg[[1]],inverse.matrix(aveLArg[[1]])),aveType)
    aveLArg[[2]]  = ave.filter(list(aveLArg[[2]],inverse.matrix(aveLArg[[2]])),aveType)
  }
  HiC.Plot(matrix.sparse(aveLArg[[1]]),pT[1],arrowsYes,offsetf)
  HiC.Plot(matrix.sparse(aveLArg[[2]]),pT[2],arrowsYes,offsetf)
  color.bar(maxC,sz)
  
  
  
  HiCRatio.Plot(ratioF(aveLArg[[1]],aveLArg[[2]],rc),pT[3],arrowsYes,offsetf)
  
  ratio.color.bar(colorRampPalette(c("red","white","blue"))(shades*2), ratioSBR,round(ratioSBR+1,0))
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import plyr
ave.filter <-function(input,aveType){
  
  t1 = Filter(Negate(is.null), input)
  output = aaply(laply(t1, as.matrix), c(2, 3), aveType,na.rm=TRUE)
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import Matrix
matrix.sparse <- function (input){
  i <- Matrix::Matrix(as.matrix(input), sparse = TRUE) # Converts dataframe back to a matrix and converts this to a sparseMatrix format
  output = summary(i) #Expands back to a list

   colnames(output)= c("V1","V2","V3")
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export

matrixE <- function(n,input,pres,minbinL,dimZ) {
  t <- sparseMatrix(i=input[,1]+pres-minbinL+1, j=input[,2]+pres-minbinL+1, x=input[,3],symmetric=T,dims=c(dimZ, dimZ)-1)
  o = as.data.frame(as.matrix(t))
  o[ o  ==0] = NA
  
  
  kPlot <- matrix.sparse(o)
  
  
  
  
  output=list(o,kPlot)
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export

ratioF <- function(input1,input2,rc){
  input1[is.na(input1)] = 0
  input2[is.na(input2)] = 0
  input1 = input1 + rc
  
  input2 = input2 + rc
  ratio = input1/input2
  # ratio[ ratio  ==NA] = 2
  # # is.na(ratio)
  # ratio[is.na(ratio)] = 2 # Remove all NaNs
  
  
  #Recompress sparsematrix:
  
  
  k <- matrix.sparse(ratio)
  
  return (k)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
workspace.folder <- function(inputStr, toRemove) {
  toRemove=c("completed","dev","centromere","proteincolocalDBS","proteinDBS")
  d <- unlist(strsplit(inputStr, "/"))
  paste(d[!d %in% toRemove], collapse = "/")
}
#' Input a sparseMatrix dataframe. 
#' Outputs a list of 3. 1. Averaged matrix data frame. 2. Averaged sparse data frame. 3. full matrix dataframe
#' This function first converts each element of the inputed list into a full matrix
#' The function then averages the full matrix using the ave.filter function
#' The function then converts the full matrix back to a sparse matrix 
#' All the data is outputed as a list
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import plyr
matrixMultiE <- function(n,input,pres,minbinL,dimZ,averg,BSlist) {
  lo = list()
  for (j in 1:length(BSlist$V1)){
    t <- sparseMatrix(i=input[[j]]$V1+pres[[j]]-minbinL[[j]]+1, j=input[[j]]$V2+pres[[j]]-minbinL[[j]]+1, x=input[[j]]$V3,symmetric=T,dims=c(dimZ, dimZ)-1)
    
    o = as.data.frame(as.matrix(t))
    o[ o  ==0] = NA
    lo[[j]] = o 
    
  }
  c = o
  if (j > 1){
    c = ave.filter(lo,averg)
  }
  
  kPlot <- matrix.sparse(c)
  output=list(c,kPlot,lo)
  return(output)
}

#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
table.create <- function(input1,n,input2=NULL,coLRange=500,noII=1,randTrue=0,seed){
  
  output = subset(input1,V1 ==n)
  if (randTrue == "1"){
    set.seed(seed)
    output$V2=sort(round(runif(length(output$V2),min=0,max=chrSize[n])))
    
  }
  if (!is.null(input2)){
  input2 = subset(input2,V1 ==n)
  output$V3 = mapply(testf,output$V2,MoreArgs = list(input2$V2,coLRange),SIMPLIFY=FALSE)
  if (noII == 1){output= output[which(output$V3 ==TRUE),]}
  if (noII == 0){output= output[which(output$V3 ==FALSE),]}
  output <- output[ -3 ]  #drops col 3
}
  if (length(output$V2)>1){
  output$V3=filter(output$V2,c(1,-1))#reports the difference between rows
  output= output[-length(output$V1),]#removes last row of the collumn, not needed as the distance of loop is calc from the row before 
  output$V4 =  round((output$V3/2)+output$V2, digits = 0)
  }else{
    output$V3=0
    output$V4=0
    }

  return (output)
  
  
}

#' Flattens list elements
#'
#' removes the chromosome origin from a list.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
combine.list <-function(input,listpo = 3){
  #combines each chromosome loop pile up into 1 list
  a = length(input)
  output = list()

  input<-input[1:16]#fills list with nulls to 16
  iv = c(input[[1]][[listpo]],input[[2]][[listpo]],input[[3]][[listpo]],input[[4]][[listpo]],input[[5]][[listpo]],input[[6]][[listpo]],input[[7]][[listpo]],input[[8]][[listpo]],input[[9]][[listpo]],input[[10]][[listpo]],input[[11]][[listpo]],input[[12]][[listpo]],input[[13]][[listpo]],input[[14]][[listpo]],input[[15]][[listpo]],input[[16]][[listpo]])

  
  return (iv)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
testf <-function(a,b,coR){
  for (j in b){
    # max = a - tempC
    # min = tempC -a
    output = findInterval(a,c(j-coR,j+coR)) == 1
    if (output == TRUE) {return(output)}
    
  }
  # if (!is.null(max)){
  #   if (max<=coLR){
  #     output = 1
  #   }
  #   if (max<=coLR){
  #     output = 1
  #   }}
  
  return (FALSE)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
data.combine <-function(n,input,HiCbed,zoomC,resKB,HiC){
ti1 = list()
ti2 = list()

spres=list()
sminbinL=list()
d = length(input)
if (d >0) {
  
  
  for (j in 1:length(input))
  {
    
    #Subset data for chromosome of interest
    Dbed1=subset(HiCbed, V1==n)
    #subset data for region around centromere
    Dbed1 = subset(Dbed1, V2 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)) & V3 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)))
    minbin=min(Dbed1$V4)
    maxbin=max(Dbed1$V4)
    
    # # for (g in 2:3){ Dbed1[,g]= Dbed1[,g]} 
    # maxs=(maxbin-minbin)*resKB #max x and y value to plot based on resolution selected
    p = zoomC - input[j]/1000
    if (p<0){p=0}
    spres[[j]] = p/resKB
    sminbinL[[j]] = minbin
    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####
    
    ti1[[j]] = subset(HiC[[1]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) #breaks the data per chromosome and stores ina list
    ti2[[j]] = subset(HiC[[2]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) 
  }
  

  output = list(ti1,ti2,spres,sminbinL)
} else {

  
  output = list(NULL,NULL,1,1)
  # chrNR  <- chrN [ chrN != n ]
  
  # chrNR = chrN[-n]

}


# HiClist1[[n]]=ti1
# HiClist2[[n]]=ti2
# pres[[n]] = spres
# minbinL[[n]]=sminbinL
#
return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
filename.shortcut <-function(input){
  if (input == "spo11"){return ("HiC_SSY12_ndt80Dspo11Y135F_1B2_8h_down5")}
if (input == "wt"){return ("HiC_SSY14_ndt80D_2A2_8h_down5")}
if (input == "rec8"){return ("HiC_SSY20_ndt80Drec8D_1A2_8h_down5")}
if (input == "zip1"){return ("HiC_SSY25_ndt80Dzip1D_2A_8h_down5")}
if (input == "hop1"){return ("HiC_SSY49_ndt80Dhop1D_1A_8h_down5")}

}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
read.iced <-function(filename,binSize=10000){
  output=read.delim(paste0("matrix/",filename,"/new_sizes/iced/",binSize,"/",filename,"_",binSize,"_iced.matrix"), header=FALSE)
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
chip.selector <-function(input1,input2 = NULL,randTrue=0,seed=453){
  readsvec <- c()
  proteinOutput = list()
  for (s in chrN){
    
    proteinOutput[[s]] =table.create(input1=input1,n=s,input2=input2,coLRange=coLR,noII=noInverseInverse,randTrue=randTrue,seed=seed)
    
    
    proteinOutput[[s]] = subset(proteinOutput[[s]], V3 %in% (loopSMin:loopSMax))
    
    
    if (length(proteinOutput[[s]]$V1)>0){
      
      
      
      readsvec[s] = length(proteinOutput[[s]]$V1)
    }else{
      readsvec[s] = 0
      proteinOutput[[s]] = data.frame(V1=character())
    }
    
  }
  readsvec=ifelse(is.na(readsvec),0,readsvec)
  output = list(proteinOutput,readsvec)
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
inverse.matrix <- function(input){
  
  output = input[c(nrow(input):1),c(nrow(input):1),drop = FALSE] 
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
main.subsetter <-function(input,midpoint = 1,chrN,HiCbed,zoomC,resKB,HiC){
  output = list()
  minbinL = list()
  pres = list()
  for (n in chrN){
    ti1 = list()
    
    spres=list()
    sminbinL=list()
    d = length(input[[n]]$V3)
    if (d >0) {
      
      
      for (j in 1:length(input[[n]]$V4))
      {
        
        #Subset data for chromosome of interest
        Dbed1=subset(HiCbed, V1==n)
        #subset data for region around centromere
        if (midpoint == 0){v=input[[n]]$V2[j]}else{v=input[[n]]$V4[j]}
        Dbed1 = subset(Dbed1, V2 %in% (v-(zoomC*1000)):(v+(zoomC*1000)) & V3 %in% (v-(zoomC*1000)):(v+(zoomC*1000)))
        minbin=min(Dbed1$V4)
        maxbin=max(Dbed1$V4)
        
        
        p = zoomC - v/1000
        if (p<0){p=0}
        spres[[j]] = p/resKB
        sminbinL[[j]] = minbin
        ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####
        
        ti1[[j]] = subset(HiC, V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) #breaks the data per chromosome and stores ina list
        
      }
      
      output[[n]] =ti1
      pres[[n]] <- spres
      minbinL[[n]]<-sminbinL
    } else {
      output[[n]]=NULL
      pres[[n]] <- 1
      minbinL[[n]]<-1
    }
  }
  
  
  return (list(output,minbinL,pres))
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
matrixMultiEt <- function(n,input,pres,minbinL,dimZ,averg,BSlist,noReflect=1,aveOnly=0) {
  lo = list()
  for (j in 1:length(BSlist$V1)){
    t <- sparseMatrix(j=input[[j]]$V1+pres[[j]]-minbinL[[j]]+1, i=input[[j]]$V2+pres[[j]]-minbinL[[j]]+1, x=input[[j]]$V3,symmetric=T,dims=c(dimZ, dimZ))
    o = as.data.frame(symmpart(as.matrix(t)))
    o[ o  ==0] = NA
    
    o = o[-nrow(o),-nrow(o)]
    lo[[j]] = o 
    
  }
  if (aveOnly != 1){
  c = o
  if (j > 1){
    c = ave.filter(lo,averg)
  }
  if (noReflect != 1){c = ave.filter(list(inverse.matrix(c),c),averg)}
  kPlot <- matrix.sparse(c)
  }else{
    kPlot = 1
    c = 1
    }
  output=list(c,kPlot,lo)
  return(output)
}
gb305/Rpackages documentation built on June 13, 2018, 10:08 a.m.