initializeDomainWeights <- function( domain.weights.table ) {
# Iterates over all rows of argument domain.weights.table, and stores in the
# current redis server domain.weights.table[[ current.row, 1 ]].
#
# Args:
# domain.weights.table : A single column table of the domain weights, with
# rownames being the domain IDs.
#
# Returns: NULL
#
lapply( rownames( domain.weights.table ), function( domain.id ) {
redisSet( domain.id, domain.weights.table[[ domain.id, 1 ]] )
})
NULL
}
initializeDomainAnnotations <- function( domain.annotations,
annotation.type = "InterPro" ) {
# Each protein's set of annotated domains is stored as a Redis Set. Key is
# the protein's accession and value the set of domain IDs of the argument
# type 'annotation.type'.
#
# Args:
# domain.annotations : The table of domain annotations as generated by
# function 'retrieveAnnotationsBiomart'.
# annotation.type : The row of argument 'domain.annotations' to read out.
#
# Returns: NULL
#
lapply( colnames( domain.annotations ), function( protein.accession ) {
prot.dom.annos <- domain.annotations[[ annotation.type, protein.accession ]]
for( domain.id in prot.dom.annos ) {
redisSAdd( protein.accession,
charToRaw( as.character( domain.id ) )
)
}
})
NULL
}
pairwiseVectorSpaceModel <- function( protein.accession.a,
protein.accession.b ) {
# Returns the _unsorted_ set union of the domain annotations for both
# argument proteins. Retrieves the two annotation sets from the current redis
# connection. See function 'initializeDomainAnnotations' on how to initialize
# this.
#
# Args:
# protein.accession.a : Accession of first protein
# protein.accession.b : Accession of second protein
#
# Returns: The set _unsorted_ set union of the two argument protein domain
# annotation sets.
#
redisSUnion( protein.accession.a, protein.accession.b )
}
generateDomainArchitectureSpaceVectorRedis <- function( protein.accession,
vector.space.model ) {
# Constructs a numeric vector where each element 'i' is either the domain
# weight of the ith domain in the argument 'vector.space.model' or 0.0 if the
# argument protein is not annotated with domain i. Note: Domains missing a
# domain weight always will have a domain weight of 0.0!
#
# Args:
# protein.accession : The accession of the protein to generate the domain
# architecture space vector for.
# vector.space.model : The set of domain IDs the vector space is composed
# of.
#
# Returns: The numeric vector with the retrieved domain weights or 0.0, if
# the argument vector is not annotated with domain i.
#
as.numeric(
lapply( vector.space.model, function( domain.id ) {
dom.weight <- redisGet( domain.id )
if( redisSIsMember( protein.accession, domain.id ) &&
! is.null(dom.weight)
) {
dom.weight
} else {
0.0
}
})
)
}
pairwiseDistanceKey <- function( protein.accession.a,
protein.accession.b, distance.type="das_dist" ) {
# Generates unique key to store the pairwise DAS distance under.
#
# Args:
# protein.accession.a : Accession of first protein.
# protein.accession.b : Accession of second protein.
#
# Returns: Alphabetically sorted arguments separated by underscore.
#
paste.funk <- function( ... ) paste( ..., sep="_" )
do.call( 'paste.funk',
as.list(
c(
sort( c(protein.accession.a, protein.accession.b) ),
distance.type
)
)
)
}
sortedPairName <- function( protein.accession.a,
protein.accession.b ) {
# Generates unique key to store the pairwise DAS distance under.
#
# Args:
# protein.accession.a : Accession of first protein.
# protein.accession.b : Accession of second protein.
#
# Returns: Alphabetically sorted arguments separated by underscore.
#
paste.funk <- function( ... ) paste( ..., sep="_" )
do.call( 'paste.funk',
as.list(
c(
sort( c(protein.accession.a, protein.accession.b) )
)
)
)
}
pairwiseDomainArchitectureDistanceRedis <- function( protein.accession.a,
protein.accession.b, save.result=TRUE ) {
# Computes the pairwise distance in the Domain Architecture Space (DAS) for
# the two argument proteins. Retrieves required values from current redis
# connection.
#
# Args:
# protein.accession.a : Accession of first protein.
# protein.accession.b : Accession of second protein.
# save.result : Store the computed distance in the DAS in the current redis
# connection using result of function 'pairwiseDistanceKey(
# protein.accession.a, protein.accession.b )'.
#
# Returns: The distance in DAS as computed by function
# 'pairwiseDomainArchitectureDistance'.
#
vsm <- pairwiseVectorSpaceModel( protein.accession.a, protein.accession.b )
das.a <- generateDomainArchitectureSpaceVectorRedis( protein.accession.a, vsm )
das.b <- generateDomainArchitectureSpaceVectorRedis( protein.accession.b, vsm )
# compute 1.0 minus the cosine of the angle between the two vectors:
das.dist <- pairwiseDomainArchitectureDistance( das.a, das.b )
# Save result in redis, if requested:
if( save.result ) {
redisSet(
pairwiseDistanceKey( protein.accession.a, protein.accession.b ),
das.dist
)
}
# return
das.dist
}
partialDomainArchitectureDistancesRedis <- function(
all.accessions, partial.accessions, lapply.funk=lapply,
init.thread.funk=NULL, close.thread.funk=NULL, ... ) {
# Computes all pairwise distances in domain architecture space between
# accessions in argument 'partial.accessions' and 'all.accessions'. Only
# computes distances if they are not yet stored in the current REDIS instance
# and stores it in this case.
#
# Args:
# all.accessions : All amino acid accessions.
# partial.accessions : Accessions to generate all pairs with all.accessions
# for.
# lapply.funk : Set to 'mclapply' if computation in parallel is
# wanted.
# init.thread.funk : If not NULL, this function is invoked at the very
# start of each loop inside (mc)lapply.
# close.thread.funk : If not null, this function is invoked at the very
# end of each loop inside (mc)lapply.
# ... : Additional arguments to be passed to 'lapply.funk'
#
# Returns: NULL
#
lapply.funk( partial.accessions, function( acc.1 ) {
if ( ! is.null(init.thread.funk) )
init.thread.funk()
for ( acc.2 in all.accessions ) {
if ( is.null( redisGet( pairwiseDistanceKey( acc.1, acc.2 ) ) ) ) {
pairwiseDomainArchitectureDistanceRedis(
acc.1, acc.2
)
}
}
if ( ! is.null(close.thread.funk) )
close.thread.funk()
}, ... )
# return
NULL
}
pairwiseSequenceDistanceRedis <- function( aa.seq.pattern,
aa.seq.subject, pattern.accession, subject.accession,
sub.matrix="PAM250", gap.open.pnlty=-10,
gap.extension.pnlty=-0.1, distance.model="Dayhoff",
distance.key.suffix='seq_dist' ) {
# Computes the sequence distance between argument amino acid sequences
# 'aa.seq.pattern' and 'aa.seq.subject' using function
# 'pairwiseSequenceDistance(...)'. Saves the computed sequence distance in
# the current redis instance using the key as returned by function
# pairwiseDistanceKey with suffix as in argument 'distance.key.suffix'.
#
# Args:
# aa.seq.pattern, aa.seq.subject : argument amino acid sequences
# sub.matrix : substitution matrix to use in the global
# alignment
# gap.open.pnlty : score penalty to apply for opening a gap
# in the global alignment
# gap.extension.pnlty : score penalty to apply for extending a
# gap in the global alignment
# distance.model : model to base the sequence distance
# computation on
# pattern.accession : The accession of argument AA-Sequence
# 'aa.seq.pattern'.
# subject.accession : The accession of argument AA-Sequence
# 'aa.seq.subject'.
# distance.key.suffix : The suffix to append to the redis key
# under which the computed sequence
# distance is stored. See function
# pairwiseDistanceKey(...) for details.
#
# Returns: The computed pairwise sequence distance as result of function
# 'pairwiseSequenceDistance(...)'.
#
# Compute Sequence Distance:
seq.dist <- pairwiseSequenceDistance(
as.character( aa.seq.pattern ),
as.character( aa.seq.subject ),
sub.matrix, gap.open.pnlty,
gap.extension.pnlty, distance.model
)
# Save result in current redis connection:
redisSet(
pairwiseDistanceKey( pattern.accession,
subject.accession, distance.type=distance.key.suffix
),
seq.dist
)
# Return computed distance:
seq.dist
}
partialSequenceDistancesRedis <- function( aa.sequences,
partial.accessions,
distance.key.suffix='seq_dist', lapply.funk=lapply,
init.thread.funk=NULL, close.thread.funk=NULL, ... ) {
# Computes pairwise amino acid sequence distances for all pairs of entries in
# argument 'partial.accessions' and argument 'all.accessions'. Distances are
# only computed, if not already stored in the current REDIS instance and are
# stored in this case.
#
# Args:
# aa.sequences : A named list of amino acid sequences.
# partial.accessions : Accessions in argument 'aa.sequences' to compute the
# pairwise distances for.
# distance.key.suffix : The suffix of the key the distance is stored under.
# See function 'pairwiseDistanceKey(...)' for details.
# lapply.funk : Function to use to iterate over all sequences and compute
# distances. Set to mclapply, if parallel computation is requested.
# init.thread.funk : If not NULL, this function is invoked at the very
# start of each loop inside (mc)lapply.
# close.thread.funk : If not null, this function is invoked at the very
# end of each loop inside (mc)lapply.
# ... : Additional arguments to be passed to 'lapply.funk'
#
# Returns: NULL
#
all.accessions <- names( aa.sequences )
lapply.funk ( partial.accessions, function( acc.pattern ) {
if ( ! is.null(init.thread.funk) )
init.thread.funk()
aa.seq.pattern <- aa.sequences[ acc.pattern ]
for ( acc.subject in all.accessions ) {
if ( is.null( redisGet( pairwiseDistanceKey( acc.pattern,
acc.subject, distance.type=distance.key.suffix ) ) )
) {
pairwiseSequenceDistanceRedis( aa.seq.pattern, aa.sequences[ acc.subject ],
acc.pattern, acc.subject
)
}
}
if ( ! is.null(close.thread.funk) )
close.thread.funk()
}, ... )
# return
NULL
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.