knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(Biostrings)
FMR1_kmer <- readRDS("data/FMR1dat/kmer_FMR1_compare.txt") d <- FMR1_kmer %>% dplyr::select(kmer, log2FC) %>% filter(!is.infinite(log2FC)) %>% dist(method = "euclidean") fit <- hclust(d, method="ward") plot(fit) groups <- cutree(fit, k=5) rect.hclust(fit, k=5, border="red") d <- FMR1_kmer %>% dplyr::select(kmer, log2FC) %>% filter(!is.infinite(log2FC), log2FC > 1) %>% as.data.frame() rownames(d) <- d$kmer d <- d[-1] %>% dist(method = "euclidean") fit <- hclust(d, method="ward") groups <- cutree(fit, k=4) rect.hclust(fit, k=4, border="red") d <- FMR1_kmer %>% dplyr::select(kmer, log2FC) %>% filter(!is.infinite(log2FC), log2FC > 1.5 | log2FC < -1.5) %>% as.data.frame() rownames(d) <- d$kmer d <- d[-1] %>% dist(method = "euclidean") fit <- hclust(d, method="ward") groups <- cutree(fit, k=4) rect.hclust(fit, k=4, border="red")
#BCrank creates motifs from ranked fasta files. importantly, ours are not ranked at all. usually BCRank is given all the sequences from chipseq peaks ranked on magnitude. we gave a much smaller fasta library(BCRANK) #FMR1_kmer %>% arrange(p_adj) %>% pull(., kmer) %>% as.character() %>% DNAStringSet() %>% writeXStringSet(., "data/FMR1dat/rankedKmer.fa") #didnt work with ranked kmers... bcrank_out <- bcrank("data/FMR1dat/longest_3UTR_FMR1.fa") topMotif <- toptable(bcrank_out, 1) weightMatrix <- pwm(topMotif, normalize=FALSE) weightMatrixNormalized <- pwm(topMotif, normalize=TRUE) library(seqLogo) seqLogo(weightMatrixNormalized)
#motifRG uses MEME-like algorithms to identify motifs enriched in a forground sequence set over a background set. #I think this is most usefull for us library(motifRG) #this is empty #FMR1_out <- findMotifFasta("data/FMR1dat/longest_3UTR_FMR1.fa", "data/FMR1dat/longest_3UTR_FMR1_ctrl.fa", max.motif = 3, enriched = TRUE) #ZBP1_out <- findMotifFasta("data/ZBP1dat/longest_whole_FL.fa","data/ZBP1dat/longest_whole_FL_ctrl.fa",max.motif=3,enriched=T) #saveRDS(ZBP1_out, "data/ZBP1dat/motifRGouput.txt") ZBP1_out < readRDS("data/ZBP1dat/motifRGoutput.txt") pwm.match <- refinePWMMotif(ZBP1_out$motifs[[1]]@match$pattern, readDNAStringSet("data/ZBP1dat/longest_whole_FL.fa")) seqLogo::seqLogo(pwm.match$model$prob) pwm.match <- refinePWMMotif(ZBP1_out$motifs[[2]]@match$pattern, readDNAStringSet("data/ZBP1dat/longest_whole_FL.fa")) seqLogo::seqLogo(pwm.match$model$prob) pwm.match <- refinePWMMotif(ZBP1_out$motifs[[3]]@match$pattern, readDNAStringSet("data/ZBP1dat/longest_whole_FL.fa")) seqLogo::seqLogo(pwm.match$model$prob)
#Gadem also identifies motifs enriched in a sequence set however there is no background set comparison library(rGADEM) library("BSgenome.Mmusculus.UCSC.mm10") #gadem <- GADEM(readDNAStringSet("data/FMR1dat/longest_3UTR_FMR1.fa"), verbose=1,genome=Mmusculus) #saveRDS(gadem, "FMR1gademoutput") gadem <- readRDS("FMR1gademoutput") getPWM(gadem)[[1]] %>% seqLogo::seqLogo() getPWM(gadem)[[2]] %>% seqLogo::seqLogo() getPWM(gadem)[[3]] %>% seqLogo::seqLogo()
library(universalmotif) #doesnt run on windows... run_meme(target.sequences = readDNAStringSet("data/FMR1dat/longest_3UTR_FMR1.fa"), ouput = "data/FMR1dat/MEME", control.sequences = readDNAStringSet("data/FMR1dat/longest_3UTR_FMR1_ctrl.fa"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.