funb.r
In gb305/Rpackages:

#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
HiCRatio.Plot <- function(plotData,plotTitle,arrowYes=0) {
  zoomC1 = zoomC/2
  plotData$change <- log2(plotData[,3])
plotData$colvalue <- ifelse(plotData$change<0,-plotData$change, plotData$change)
rbPal <- colorRampPalette(c("white","blue"))
rbPal1 <- colorRampPalette(c("white","red"))
plotData$colvalue <- ifelse(plotData$colvalue<=ratioSBR, plotData$colvalue, ratioSBR)  #makes color values above the max = to the max range
plotData$colB <- ifelse(plotData$change<=0, plotData$colvalue, 0)
plotData$colR <- ifelse(plotData$change>=0, plotData$colvalue, 0)

plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
     ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
axis(2, at = ylabel,labels = y2label, las = 2)
axis(1, at = xlabel,labels = xlabel, las = 1)
palette(paste0(rbPal1(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+1)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colR)
palette(paste0(rbPal(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+1)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colB)
title(main=plotTitle,line = -plotTitleOffset)
if(arrowYes == 1){
  arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
  arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
}

}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
HiC.Plot <- function(plotData,plotTitle,arrowYes=0) {
  
  palette(paste0(colfunc(r*log2(maxC)),"FF"))
  plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
       ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
  axis(2, at = ylabel,labels = y2label, las = 2)
  axis(1, at = xlabel,labels = xlabel, las = 1)
  points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+1)*resKB*offset)-zoomC,
         ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
         pch=18,
         cex=sz,col=r*log2(plotData$V3))
  #points((HiCPlot[[i]][,1]-minbin+0.5)*1+(HiCPlot[[i]][,2]-HiCPlot[[i]][,1]+0.5)*1*offset,-((HiCPlot[[i]][,2]-minbin+0.5)*1-(HiCPlot[[i]][,1]-minbin+0.5)*1*1),pch=18,cex=sz,col=r*log2(HiCPlot[[i]][,3]), xlim=c(0,maxs), ylim=c(0,maxs))
  # arrows(0,0-(zoomC+1),0,0-(zoomC+1),col="black",lwd=2,length=.1) #Add CENtromere
  # p = HiCPlot[[2]]
  title(main=plotTitle,line = -plotTitleOffset)
  
  
  if(arrowYes == 1){
    arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
    arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
  }
  
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
ratio.color.bar <- function(lut,
                            min,nticks,max=-min,
                            ticks=seq(min, 0, len=nticks), title='') {
  scale = (length(lut)-1)/(max-min)
  tes <- 2^ticks
  p = length(tes)
  tes[p] = 0
  # tes2 = tes/2^4
  plot(c(5,5), c(min,max), pch=15,cex=sz*1, type='n', bty='n', xlim=c(0,10), xaxt='n', xlab='', yaxt='n', ylab="Folds of Change Scalebar", main=title)
  axis(2, ticks,tes ,las=1)  #red scalebar
  axis(2, -ticks,tes ,las=1) #blue scalebar
  for (i in 1:(length(lut)-1)) {
    y = (i-1)/scale + min
    rect(1,y,9,y+1/scale, col=lut[i], border=lut[i])
  }
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
color.bar <- function(C,sz) {
  #New scale bar plot:
  scalebar=1:C
  scalebar=2^(seq(log2(1), log2(C), length.out = 50))
  scalebar2=tail(scalebar,-1)
  scalebar=head(scalebar,-1)
  #plot:
  plot(5,5,pch=15,cex=sz*1, type="n",las=2, log="y", xlim=c(0,10), ylim=c(1,C), xaxt="n",xlab=NA, ylab="Interaction freq per 10 million pairs",at=c(1,2,5,10,20,50,100,200,500,1000,2000,5000,10000))
  
  rect(rep(1, length(scalebar)),scalebar,
       rep(9, length(scalebar)),scalebar2,pch=15,cex=sz*1, col=r*log2(scalebar),border = r*log2(scalebar))
  
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
ave.mean.median <- function(aveType,aveLArg,rc,pT,arrowsYes=0){

  
  
  tempS1 <- matrix.sparse(aveLArg[[1]])
  
  tempS2<- matrix.sparse(aveLArg[[2]])
  

  HiC.Plot(tempS1,pT[1],arrowsYes)
  HiC.Plot(tempS2,pT[2],arrowsYes)
  color.bar(maxC,sz)
  
  
  # ratioave = temp1/temp2
  # 
  # ratioave[is.na(ratioave)] = 1 # Remove all NaNs
  
  
  #Recompress sparsematrix:
  

  
  HiCRatio.Plot(ratioF(aveLArg[[1]],aveLArg[[2]],rc),pT[3],arrowsYes)
  
  ratio.color.bar(colorRampPalette(c("red","white","blue"))(shades*2), ratioSBR,round(ratioSBR+1,0))
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import plyr
ave.filter <-function(input,aveType){
  
  t1 = Filter(Negate(is.null), input)
  output = aaply(laply(t1, as.matrix), c(2, 3), aveType,na.rm=TRUE)
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import Matrix
matrix.sparse <- function (input){
  i <- Matrix::Matrix(as.matrix(input), sparse = TRUE) # Converts dataframe back to a matrix and converts this to a sparseMatrix format
  output = summary(i) #Expands back to a list

   colnames(output)= c("V1","V2","V3")
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export

matrixE <- function(n,input,pres,minbinL,dimZ) {
  t <- sparseMatrix(i=input[,1]+pres-minbinL+1, j=input[,2]+pres-minbinL+1, x=input[,3],symmetric=T,dims=c(dimZ, dimZ)-1)
  o = as.data.frame(as.matrix(t))
  o[ o  ==0] = NA
  
  
  kPlot <- matrix.sparse(o)
  
  
  
  
  output=list(o,kPlot)
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export

ratioF <- function(input1,input2,rc){
  input1[is.na(input1)] = 0
  input2[is.na(input2)] = 0
  input1 = input1 + rc
  
  input2 = input2 + rc
  ratio = input1/input2
  # ratio[ ratio  ==NA] = 2
  # # is.na(ratio)
  # ratio[is.na(ratio)] = 2 # Remove all NaNs
  
  
  #Recompress sparsematrix:
  
  
  k <- matrix.sparse(ratio)
  
  return (k)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
workspace.folder <- function(inputStr, toRemove) {
  toRemove=c("completed","dev","centromere","proteincolocalDBS","proteinDBS")
  d <- unlist(strsplit(inputStr, "/"))
  paste(d[!d %in% toRemove], collapse = "/")
}
#' Input a sparseMatrix dataframe. 
#' Outputs a list of 3. 1. Averaged matrix data frame. 2. Averaged sparse data frame. 3. full matrix dataframe
#' This function first converts each element of the inputed list into a full matrix
#' The function then averages the full matrix using the ave.filter function
#' The function then converts the full matrix back to a sparse matrix 
#' All the data is outputed as a list
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import plyr
matrixMultiE <- function(n,input,pres,minbinL,dimZ,averg,BSlist) {
  lo = list()
  for (j in 1:length(BSlist$V1)){
    t <- sparseMatrix(i=input[[j]]$V1+pres[[j]]-minbinL[[j]]+1, j=input[[j]]$V2+pres[[j]]-minbinL[[j]]+1, x=input[[j]]$V3,symmetric=T,dims=c(dimZ, dimZ)-1)
    
    o = as.data.frame(as.matrix(t))
    o[ o  ==0] = NA
    lo[[j]] = o 
    
  }
  c = o
  if (j > 1){
    c = ave.filter(lo,averg)
  }
  
  kPlot <- matrix.sparse(c)
  output=list(c,kPlot,lo)
  return(output)
}

#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
table.create <- function(input1,n,input2=NULL,coLRange=500){
  output = subset(input1,V1 ==n)
  if (!is.null(input2)){
  input2 = subset(input2,V1 ==n)
  output$V3 = mapply(testf,output$V2,MoreArgs = list(input2$V2,coLRange),SIMPLIFY=FALSE)
  output= output[which(output$V3 ==TRUE),]
  output <- output[ -3 ]  #drops col 3
}
  if (length(output$V2)>1){
  output$V3=filter(output$V2,c(1,-1))#reports the difference between rows
  output= output[-length(output$V1),]#removes last row of the collumn, not needed as the distance of loop is calc from the row before 
  output$V4 =  round((output$V3/2)+output$V2, digits = 0)
  }else{
    output$V3=0
    output$V4=0
    }

  return (output)
  
  
}

#' Flattens list elements
#'
#' removes the chromosome origin from a list.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
combine.list <-function(input){
  #combines each chromosome loop pile up into 1 list
  a = length(input)
  output = list()

  input<-input[1:16]#fills list with nulls to 16
  iv = c(input[[1]][[3]],input[[2]][[3]],input[[3]][[3]],input[[4]][[3]],input[[5]][[3]],input[[6]][[3]],input[[7]][[3]],input[[8]][[3]],input[[9]][[3]],input[[10]][[3]],input[[11]][[3]],input[[12]][[3]],input[[13]][[3]],input[[14]][[3]],input[[15]][[3]],input[[16]][[3]])
  g = iv[seq(1, a, by =1)]
  output= do.call(c, list(g))
  
  return (iv)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
testf <-function(a,b,coR){
  for (j in b){
    # max = a - tempC
    # min = tempC -a
    output = findInterval(a,c(j-coR,j+coR)) == 1
    if (output == TRUE) {return(output)}
    
  }
  # if (!is.null(max)){
  #   if (max<=coLR){
  #     output = 1
  #   }
  #   if (max<=coLR){
  #     output = 1
  #   }}
  
  return (FALSE)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
data.combine <-function(n,input,HiCbed,zoomC,resKB,HiC){
ti1 = list()
ti2 = list()

spres=list()
sminbinL=list()
d = length(input)
if (d >0) {
  
  
  for (j in 1:length(input))
  {
    
    #Subset data for chromosome of interest
    Dbed1=subset(HiCbed, V1==n)
    #subset data for region around centromere
    Dbed1 = subset(Dbed1, V2 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)) & V3 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)))
    minbin=min(Dbed1$V4)
    maxbin=max(Dbed1$V4)
    
    # # for (g in 2:3){ Dbed1[,g]= Dbed1[,g]} 
    # maxs=(maxbin-minbin)*resKB #max x and y value to plot based on resolution selected
    p = zoomC - input[j]/1000
    if (p<0){p=0}
    spres[[j]] = p/resKB
    sminbinL[[j]] = minbin
    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####
    
    ti1[[j]] = subset(HiC[[1]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) #breaks the data per chromosome and stores ina list
    ti2[[j]] = subset(HiC[[2]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) 
  }
  

  output = list(ti1,ti2,spres,sminbinL)
} else {

  
  output = list(NULL,NULL,1,1)
  # chrNR  <- chrN [ chrN != n ]
  
  # chrNR = chrN[-n]

}


# HiClist1[[n]]=ti1
# HiClist2[[n]]=ti2
# pres[[n]] = spres
# minbinL[[n]]=sminbinL
#
return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
filename.shortcut <-function(input){
  if (input == "spo11"){return ("HiC_SSY12_ndt80Dspo11Y135F_1B2_8h_down5")}
if (input == "wt"){return ("HiC_SSY14_ndt80D_2A2_8h_down5")}
if (input == "rec8"){return ("HiC_SSY20_ndt80Drec8D_1A2_8h_down5")}
if (input == "zip1"){return ("HiC_SSY25_ndt80Dzip1D_2A_8h_down5")}
if (input == "hop1"){return ("HiC_SSY49_ndt80Dhop1D_1A_8h_down5")}

}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
read.iced <-function(filename,binSize=10000){
  output=read.delim(paste0("matrix/",filename,"/new_sizes/iced/",binSize,"/",filename,"_",binSize,"_iced.matrix"), header=FALSE)
  return(output)
}