Rscript/cor.R

#setwd("/Users/wangchao/Development/microarray1433/Rscript")
setwd("C:/Users/chaowang/Documents/GitHub/microarray1433/Rscript")
#read file
orig_data <- read.table("../data/array.dat",header=T,colClasses=c("integer","character","character","numeric","integer","factor"))
library(Biostrings)
data(BLOSUM62)
library(reshape2)

#construct functions

affinity <- function(affinity) {
	affinity$RF = affinity$RF + 1
	pairwise.value <- expand.grid(affinity$RF, affinity$RF)
	pairwise.id <- expand.grid(affinity$ID, affinity$ID)
	pairwise.id$Var3 <- mapply(function(x,y)log2(max(x,y)/min(x,y)), pairwise.value$Var1, pairwise.value$Var2)
	return(pairwise.id$Var3)
}

seq.function <- function(x,y) {
	m <- min(BLOSUM62[x,x], BLOSUM62[y,y]) - BLOSUM62[x,y]
	return(m)
}

seq.dist <- function(x,y) {
	seq <- gsub(pattern = "XXX|pS/T",replacement = "", c(x,y))
	seq.mat <- strsplit(seq, "")
	a <- seq.function(seq.mat[[1]][1],seq.mat[[2]][1])
	b <- seq.function(seq.mat[[1]][2],seq.mat[[2]][2])
	c <- seq.function(seq.mat[[1]][3],seq.mat[[2]][3])
	distance <- sqrt((a^2 + b^2 + c^2)/3)
	return(distance)
}

sequence <-  function(sequence) {
	pairwise.seq <- expand.grid(sequence$Sequence, sequence$Sequence, stringsAsFactors=F)
	pairwise.id <- expand.grid(sequence$ID, sequence$ID)
	pairwise.id$Var3 <- mapply(seq.dist, pairwise.seq$Var1, pairwise.seq$Var2)
	return(pairwise.id$Var3)
}

#main scripts
#Calculate sequence distance
two.sides <- orig_data[orig_data$Isoforms == "Beta",]	
left.side <- two.sides[grep("pS/TXXX",two.sides$Sequence),]
right.side <- two.sides[grep("XXXpS/T",two.sides$Sequence),]
cat("Calculating pairwise sequence distance at left side\n", sep=" ")
left_sequnces.mat <- sequence(left.side)
cat("Calculating pairwise sequence distance at right side\n", sep=" ")
right_sequences.mat <- sequence(right.side)

#Calculate affinity distance for each isoform
for (isoform in levels(orig_data$Isoforms)){	
	dir.create(paste("../output/", isoform, sep = ""),showWarnings = F)
	two.sides <- orig_data[orig_data$Isoforms == isoform,]	
	left.side <- two.sides[grep("pS/TXXX",two.sides$Sequence),]
	right.side <- two.sides[grep("XXXpS/T",two.sides$Sequence),]

	##left.side

	cat("Calculating pairwise affinity at left side for", isoform, "\n", sep=" ")
	left_affinity.mat <- affinity(left.side)
	left_cor <- cor(left_affinity.mat, left_sequnces.mat)
	##right.side
	
	cat("Calculating pairwise affinity at right side for", isoform, "\n", sep=" ")
	right_affinity.mat <- affinity(right.side)
	right_cor <- cor(right_affinity.mat, right_sequences.mat)
	cat("Left", left_cor, "Right", right_cor, "\n", sep = "\t")
}
Solipugids/microarray1433 documentation built on June 21, 2019, 12:36 a.m.