Nothing
#' as in Bakken et al. (2017 PMID 29322913) create gene signatures for k
#' cell types, each of which fails to express all but one gene in a set of k genes
#' @param idvec character vector of identifiers, must have names() set to identify cells bearing genes
#' @param conds character(2) tokens used to indicate condition to which signature element contributes
#' @param tags character vector of cell-type identifiers; for Cell Ontology use CL: as prefix,
#' one element for each element of idvec
#' @return a long data.frame
#' @examples
#' sigels = c("CL:X01"="GRIK3", "CL:X02"="NTNG1", "CL:X03"="BAGE2",
#' "CL:X04"="MC4R", "CL:X05"="PAX6", "CL:X06"="TSPAN12", "CL:X07"="hSHISA8",
#' "CL:X08"="SNCG", "CL:X09"="ARHGEF28", "CL:X10"="EGF")
#' sigdf = cyclicSigset(sigels)
#' head(sigdf)
#' @export
cyclicSigset = function(idvec, conds=c("hasExp", "lacksExp"), tags=paste0("CL:X", 1:length(idvec))) {
combdf = expand.grid(idvec, idvec,
stringsAsFactors=FALSE)
todrp = which(apply(combdf,1,function(x)x[1]==x[2]))
combdf = combdf[-todrp,]
lacks = split(combdf[,1], combdf[,2])
ins = unlist(lapply(split(combdf[,2], combdf[,2]), "[", 1))
inds = rep(tags, each=length(idvec))
typeid = names(idvec)
names(typeid) = as.character(idvec)
for (i in 1:length(lacks)) {
lacks[[i]] = data.frame(gene=c(as.character(ins[i]), lacks[[i]]), type=typeid[ins[i]], stringsAsFactors=FALSE)
}
ans = do.call(rbind, lacks)
ans = cbind(ans, cond=c(conds[1], rep(conds[2], length(idvec)-1)))
rownames(ans) = NULL
ans
}
# sigels = c("CL:X01"="GRIK3", "CL:X02"="NTNG1", "CL:X03"="BAGE2",
# "CL:X04"="MC4R", "CL:X05"="PAX6", "CL:X06"="TSPAN12", "CL:X07"="hSHISA8",
# "CL:X08"="SNCG", "CL:X09"="ARHGEF28", "CL:X10"="EGF")
#' use output of cyclicSigset to generate a series of character vectors constituting OBO terms
#' @param ldf a 'long format' data.frame as created by cyclicSigset
#' @param propmap a character vector with names of elements corresponding to 'abbreviated' relationship
#' tokens and element values corresponding to full relationship-naming strings
#' @param sigels a named character vector associating cell types (names) to genes expressed in a cyclic set,
#' one element per type
#' @param prologMaker a function with arguments (id, ...), in which id is character(1),
#' that generates a vector of strings that will be used for each cell type-specific term.
#' @note ldfToTerms is not sufficiently general to produce terms for any reasonably
#' populated long data frame/propmap combination, but it is a working example for the
#' cyclic set context.
#' @return a character vector, strings can be concatenated to OBO
#' @examples
#' # a set of cell types -- names are cell type token, values are genes expressed in a
#' # cyclic set -- each cell type expresses exactly one gene in the set and fails to
#' # express all the other genes in the set. See Figs 3 and 4 of Bakken et al [PMID 29322913].
#' sigels = c("CL:X01"="GRIK3", "CL:X02"="NTNG1", "CL:X03"="BAGE2",
#' "CL:X04"="MC4R", "CL:X05"="PAX6", "CL:X06"="TSPAN12", "CL:X07"="hSHISA8",
#' "CL:X08"="SNCG", "CL:X09"="ARHGEF28", "CL:X10"="EGF")
#' # create the associated long data frame
#' ldf = cyclicSigset(sigels)
#' # describe the abbreviations
#' pmap = c("hasExp"="has_expression_of", lacksExp="lacks_expression_of")
#'
#' # now define the prolog for each cell type
#' makeIntnProlog = function(id, ...) {
#' # make type-specific prologs as key-value pairs
#' c(
#' sprintf("id: %s", id),
#' sprintf("name: %s-expressing cortical layer 1 interneuron, human", ...),
#' sprintf("def: '%s-expressing cortical layer 1 interneuron, human described via RNA-seq observations' [PMID 29322913]", ...),
#' "is_a: CL:0000099 ! interneuron",
#' "intersection_of: CL:0000099 ! interneuron")
#' }
#' tms = ldfToTerms(ldf, pmap, sigels, makeIntnProlog)
#' cat(tms[[1]], sep="\n")
#' @export
ldfToTerms = function(ldf, propmap, sigels,
prologMaker=function(id, ...) sprintf("id: %s", id) ) {
tms = split(ldf, ldf$type)
ctypes = names(tms)
lapply(1:length(ctypes), function(x) ldfToTerm(tms[[x]], propmap=propmap,
ctype=ctypes[x], gn=sigels[ctypes[x]], prologMaker))
}
#pmap = c("hasExp"="has_expression_of", lacksExp="lacks_expression_of")
ldfToTerm = function(ldf, propmap, ctype, gn, prologMaker) {
nty = length(unique(ldf$type))
stopifnot(nty==1)
lins = paste(propmap[ldf$cond], ": ",
PROSYM[match(ldf$gene, PROSYM$SYMBOL), "PRID"], " ! ", ldf$gene, sep="")
c("[Term]", prologMaker(ctype, gn), lins)
}
#aa = ldfToTerms(ldf, pmap, sigels)
#makeIntnProlog = function(id, ...) {
## make type-specific prologs as key-value pairs
# c(
# sprintf("id: %s", id),
# sprintf("name: %s-expressing cortical layer 1 interneuron, human", ...),
# sprintf("def: '%s-expressing cortical layer 1 interneuron, human described via RNA-seq observations' [PMID 29322913]", ...),
# "is_a: CL:0000099 ! interneuron",
# "intersection_of: CL:0000099 ! interneuron")
#}
#
#mm = makeProlog(names(sigels)[1], sigels[1])
#
#
#[Term]
#id: CL:X000001
#name: NTNG1-expressing cortical layer 1 interneuron, human
#def: 'cortical layer 1 interneuron described via RNA-seq observations' [PMID 29322913]
#is_a: CL:0000099 ! interneuron
#intersection_of: CL:0000099 ! interneuron
#has_expression_of: PR:000011467 ! NTNG1
#lacks_expression_of: PR:000008242 ! GRIK3
#lacks_expression_of: PR:000004625 ! BAGE2
#lacks_expression_of: PR:000001237 ! MC4R
#lacks_expression_of: PR:000012318 ! PAX6
#lacks_expression_of: PR:000016738 ! TSPAN12
#lacks_expression_of: PR:B8ZZ34 ! hSHISA8
#lacks_expression_of: PR:000015325 ! SNCG
#lacks_expression_of: PR:000013942 ! ARHGEF28
#lacks_expression_of: PR:000006928 ! EGF
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.