Rscript/local_pairwised.R

setwd("/Users/wangchao/Development/microarray1433/Rscript")
orig_data <- read.table("../data/array.dat",header=T,colClasses=c("integer","character","character","numeric","integer","factor"))
library(Biostrings)
data(BLOSUM62)
library(reshape2)

#normorlised BLOSUM62
matrix_transform <- function(x){
	output <- input <- x[-(nrow(x)-4):-nrow(x),-(ncol(x)-4):-ncol(x)]
	for (i in 1:nrow(input)) {
		for (j in 1:ncol(input)){
			output[i,j] = (input[i,j]/sqrt(input[i,i]*input[j,j])+1)/2
		}
	}
	return(output)
}

nBLOSUM <- matrix_transform(BLOSUM62)

aff.dist <- function(x){
	dist <- log2(max(x[1],x[2])/min(x[1],x[2]))
	return(dist)
}

seq.dist <- function(x) {
	seq <- gsub(pattern = "XXX|pS/T",replacement = "", x)
	seq.mat <- strsplit(seq, "")
	a <- nBLOSUM[seq.mat[[1]][1],seq.mat[[2]][1]]
	b <- nBLOSUM[seq.mat[[1]][2],seq.mat[[2]][2]]
	c <- nBLOSUM[seq.mat[[1]][3],seq.mat[[2]][3]]
	single <- matrix(c( 1-a, 1-b, 1-c, sqrt((1-a) * (1-b)), sqrt((1-a) * (1-c)), sqrt((1-b) * (1-c)), ((1-a) * (1-b) * (1-c))^(1/3)),byrow=T,nrow=1)
	return(single)
}

value = 100
cal.lm <- function(row, m) {
	seq.list <- m[(row-value):(row+value), 3] # get the peptide sequence
	affinity.list <- log2(m[(row-value) : (row+value), 4]+1) #get the affinity value
	seq.mat <- matrix(c(seq.list,rep(seq.list[value + 1], length(seq.list))), ncol = 2)
	seq.input <- t(apply(seq.mat, 1, seq.dist))
	affinity.mat <- matrix(c(affinity.list, rep(affinity.list[value + 1], length(affinity.list))), ncol = 2)
	affinity.mat <- affinity.mat + 1 
	affinity.input <- t(apply(affinity.mat, 1, aff.dist))
	seq.input <- seq.input[-(value + 1),]
	affinity.input <- affinity.input[1,-(value + 1)]
	output <- lm(affinity.input ~ seq.input, x = T, y = T)
	result <- mean(c((affinity.list[1:value] - output$fitted.values[1:value]),(output$fitted.values[(value + 1):(value + value)] + affinity.list[(value + 1):(value + value)])))
	return(result)
}

mainfun <- function(isoforms) {
	two.sides <- orig_data[orig_data$Isoforms == isoforms,]
	left.side <- two.sides[grep("pS/TXXX",two.sides$Sequence),]
	right.side <- two.sides[grep("XXXpS/T",two.sides$Sequence),]

	left.side <- left.side[order(left.side$RANK),]
	right.side <- right.side[order(right.side$RANK),]

	left.nrow <- (value + 1) : (nrow(left.side) - value)
	left.exp <- lapply(left.nrow, cal.lm, left.side)
	left.exp <-do.call(c, left.exp)

	right.nrow <- (value + 1) : (nrow(right.side) - value)
	right.exp <- lapply(right.nrow, cal.lm, right.side)
	right.exp <- do.call(c, right.exp)
	biaoti <- paste("LEFT", isoforms,sep=" ")
	plot(left.exp, log2(left.side[left.nrow, 4] + 1), pty = '.', type = 'p', cex = 0.4, pch = 20, col = 'gray', main = biaoti, ylab = "Real", xlab = 'Mean')
	biaoti <- paste("RIGHT", isoforms,sep=" ")
	plot(right.exp, log2(right.side[right.nrow, 4] + 1), pty = '.', type = 'p', cex = 0.4, pch = 20, col = 'gray', main = biaoti, ylab = "Real", xlab = 'Mean')
}

layout(matrix(1:14, byrow = F, nrow = 2))
lapply(levels(orig_data$Isoforms), mainfun)
Solipugids/microarray1433 documentation built on June 21, 2019, 12:36 a.m.