require(rtracklayer)
require(plyr)
require(dplyr)
require(seqinr)
require(GenomicRanges)
require(ggplot2)
require(gtable)
#require(Gviz)
source('Location.R')
source('comp.fun.R')
diro = file.path(Sys.getenv("misc3"), "comp.stat")
#qnames = c("HM056", "HM056.AC", "HM340", "HM340.AC")
#qnames = c("HM056", "HM324", "Malbus")
tlen = read.table(tcfg$size, sep = "\t", header = F, as.is = T)
grt = with(tlen, GRanges(seqnames = V1, ranges = IRanges(1, end = V2)))
tt = data.frame(chr = tlen$V1, beg = 1, end = tlen$V2)
tgap = read.table(tcfg$gap, sep = "\t", header = F, as.is = T)
grp = with(tgap, GRanges(seqnames = V1, ranges = IRanges(V2, end = V3)))
tp = data.frame(chr = tgap$V1, beg = tgap$V2, end = tgap$V3)
###### basic assembly stats
fr = file.path(diro, "00.sequencing.raw.tbl")
tr = read.table(fr, header = T, sep = "\t", as.is = T)
accs = sapply(strsplit(tr$label, split = "_"), "[", 1)
accs[accs == "HM018"] = "HM023"
idxs = accs %in% qnames
tsa = cbind(org = accs[idxs], tr[idxs,])
tsa$insert_size = as.numeric(tsa$insert_size)
libtype = rep(NA, nrow(tsa))
libtype[! is.na(tsa$insert_size) & tsa$insert_size > 300] = 'LIPE'
libtype[! is.na(tsa$insert_size) & tsa$insert_size <= 300] = 'SIPE'
idxs_sipe = unique(c(grep("SIPE", tsa$label), grep("SIPE", tsa$label.1)))
idxs_lipe = unique(c(grep("LIPE", tsa$label), grep("LIPE", tsa$label.1)))
libtype[is.na(tsa$insert_size) & 1:length(libtype) %in% idxs_sipe] = "SIPE"
libtype[is.na(tsa$insert_size) & 1:length(libtype) %in% idxs_lipe] = "LIPE"
tsa = cbind(tsa, libtype = libtype)
to1 = ddply(tsa, .(org, libtype), summarise, Gb = sum(filtered_count/1000000000*cycles*2))
to = reshape(to1, direction = 'wide', timevar = 'libtype', idvar = 'org')
reflength <- 500000000
to = cbind(to, coverage = (to$Gb.LIPE + to$Gb.SIPE) / (reflength/1000000000))
fr2 = file.path(diro, "00.assembly.raw.tbl")
tr2 = read.table(fr2, header = T, sep = "\t", as.is = T)
tr2$Accession[tr2$Accession == "HM018"] = "HM023"
to = merge(to, tr2[,c('Accession', 'SIPE.status', 'LIPE.status')], by.x = 'org', by.y = 'Accession', all.x = T)
too = data.frame('org' = to$org, 'SIPE' = format(to$Gb.SIPE, digits = 3, nsmall = 1), 'LIPE' = format(to$Gb.LIPE, digits = 3, nsmall = 1), 'SIPE detail' = to$SIPE.status, 'LIPE detail' = to$LIPE.status, 'Total coverage (fold)' = format(to$coverage, digits = 3, nsmall = 1), stringsAsFactors = F)
fo = file.path(diro, "00.stat.tbl")
write.table(too, fo, sep = "\t", row.names = F, col.names = T, quote = F)
library(Biostrings)
source(paste("http://faculty.ucr.edu/~tgirke/",
"Documents/R_BioCond/My_R_Scripts/contigStats.R", sep=''))
reflength <- 500000000
stats = list()
for (qname in c(tname, qnames)) {
dir = sprintf("%s/%s", Sys.getenv("genome"), qname)
flen = file.path(dir, "15.sizes")
tlen = read.table(flen, sep = "\t", header = F, as.is = T)
total_len = sum(tlen$V2)
fgap = file.path(dir, "16.gap.bed")
tgap = read.table(fgap, sep = "\t", header = F, as.is = T)
total_gap = sum(tgap$V3 - tgap$V2)
total_bases = total_len - total_gap
if(qname == tname) {
scf_stat = rep(0, 4)
ctg_stat = rep(0, 4)
} else {
f_scf = file.path(dir, "11_genome.fas")
assembly <- readDNAStringSet(f_scf, "fasta")
N <- list(acc = width(assembly))
# reflength <- sapply(N, sum)
st <- contigStats(N = N, reflength = reflength, style = "data")
scf_stat = as.integer(st$Contig_Stats)[c(8,2,6,4)]
f_ctg = file.path(dir, "ctg.fas")
assembly <- readDNAStringSet(f_ctg, "fasta")
N <- list(acc = width(assembly))
# reflength <- sapply(N, sum)
st <- contigStats(N = N, reflength = reflength, style = "data")
ctg_stat = as.integer(st$Contig_Stats)[c(8,2,6,4)]
}
# repeat stats
frep = file.path(dir, "12.rm.bed")
trep = read.table(frep, sep = "\t", header = F, as.is = T)
brep = sum(trep$V3 - trep$V2)
pct_rep = brep / total_bases * 100
pct_rep = sprintf("%.02f", pct_rep)
stat = c(total_len, total_bases, scf_stat, ctg_stat, brep)
stat = format(stat, big.mark = ",")
stat = c(stat, pct_rep)
stats[[qname]] = matrix(stat, nrow = 1,
dimnames = list(NULL, c("Total Span", "Total Bases",
"Number", "N50", "Median", "Max", "Number", "N50", "Median", "Max",
"Bases", "Percent")))
}
ds = do.call(rbind.data.frame, stats)
fo = file.path(diro, "01_assembly_stat.tbl")
write.table(ds, fo, sep = "\t", row.names = T, col.names = T, quote = F)
for (qname in qnames) {
dir = sprintf("%s/%s", Sys.getenv("genome"), qname)
flen = file.path(dir, "15.sizes")
tlen = read.table(flen, sep = "\t", header = F, as.is = T)
total_len = sum(tlen$V2)
cat(qname, sum(tlen$V2[tlen$V2 >= 100000]) / total_len, "\n")
}
tc = read.table(tcfg$gene, header = F, sep = "\t", as.is = T)
colnames(tc) = c("chr", "beg", "end", "srd", "id", "type", "fam")
grc = with(tc[tc$type == 'cds',], GRanges(seqnames = chr, ranges = IRanges(beg, end = end)))
stats = list()
for (qname in qnames) {
fgax = sprintf("%s/%s_HM101/23_blat/31.5/gax", Sys.getenv("misc3"), qname)
tgax = read.table(fgax, header = F, sep = "\t", as.is = T)[,1:3]
grg = with(tgax, GRanges(seqnames = V1, ranges = IRanges(V2, end = V3)))
bp_cov = sum(width(reduce(grg)))
bp_cov_cds = sum(width(GenomicRanges::intersect(grg, grc)))
stats[[qname]] = matrix(c(bp_cov, bp_cov_cds), nrow = 1, dimnames = list(NULL,
c("bp_cov", "bp_cov_cds")))
}
ds = do.call(rbind.data.frame, stats)
ungap = sum(tt$end) - sum(tp$end - tp$beg + 1)
ds = cbind(ds, pct_cov = ds$bp_cov / ungap, pct_cov_cds = ds$bp_cov_cds / sum(width(reduce(grc))))
##### functional annotation stat
stats = list()
statd = list()
nstats = list()
rstats = list()
cstats = list()
tstats = list()
orgs_rnaseq = c("HM034", "HM056", "HM101", "HM340")
fi = file.path(Sys.getenv("misc2"), "genefam", "nbs.info")
nfams = read.table(fi, header = T, sep = "\t", as.is = T)[,1]
fi = file.path(Sys.getenv("misc2"), "genefam", "crp.info")
cfams = read.table(fi, header = T, sep = "\t", as.is = T)[,1]
fi = file.path(Sys.getenv("misc2"), "genefam", "rlk.info")
rfams = read.table(fi, header = T, sep = "\t", as.is = T)[,1]
fi = file.path(Sys.getenv("data"), 'db', 'pfam', 'genefam.tsv')
ti = read.table(fi, header = T, sep = "\t", as.is = T)
tfams = ti$dom[ti$fam == 'TE']
fams = unique(ti$fam[order(ti$pri)])
for (qname in c(tname, qnames_15)) {
dir = sprintf("%s/%s", Sys.getenv("genome"), qname)
fg = file.path(dir, "51.gtb")
tg = read.table(fg, sep = "\t", header = T, as.is = T)[,c(1:6,15:17)]
ngene = nrow(tg)
ids_nonte = tg$id[tg$cat2 != "TE"]
n_nonte = length(ids_nonte)
dtn = table(tg$cat2)
x = c()
x[fams] = 0
x[names(dtn)] = dtn
statd[[qname]] = matrix(c(x, ngene),
nrow = 1, dimnames = list(NULL, c(fams, "Total Genes")))
tf = file.path(dir, "51.fas")
y = read.fasta(tf, as.string = TRUE, seqtype = "AA")
dl = ldply(y, nchar)
lens = dl$V1[dl$.id %in% ids_nonte]
mean_prot = mean(lens)
med_prot = median(lens)
tt = tg[tg$cat2 == 'TE',]
dtn = table(tt$cat3)
dtn = dtn[names(dtn) != '']
x = c()
x[tfams] = 0
x[names(dtn)] = dtn
tstats[[qname]] = matrix(x, nrow = 1, dimnames = list(NULL, tfams))
n_te = nrow(tt)
tn = tg[tg$cat2 %in% c('CC-NBS-LRR', 'TIR-NBS-LRR'),]
dtn = table(tn$cat3)
x = c()
x[nfams] = 0
x[names(dtn)] = dtn
nstats[[qname]] = matrix(x, nrow = 1, dimnames = list(NULL, nfams))
n_nbs = nrow(tn)
tr = tg[tg$cat2 %in% c('RLK','LRR-RLK'),]
dtn = table(tr$cat3)
x = c()
x[rfams] = 0
x[names(dtn)] = dtn
rstats[[qname]] = matrix(x, nrow = 1, dimnames = list(NULL, rfams))
n_rlk = nrow(tr)
tc = tg[tg$cat2 %in% c('CRP0000-1030','NCR','CRP1600-6250'),]
dtn = table(tc$cat3)
x = c()
x[cfams] = 0
x[names(dtn)] = dtn
cstats[[qname]] = matrix(x, nrow = 1, dimnames = list(NULL, cfams))
n_ncr = sum(tc$cat2 == 'NCR')
tf = tg[tg$cat2 %in% c('F-box'),]
n_fbx = nrow(tf)
ids = ids_nonte
ids_rna = c(); ids_hom = c()
if(qname %in% orgs_rnaseq) {
org = qname
ff = file.path(Sys.getenv("misc2"), "rnaseq/mt/31_cufflinks", org, 'isoforms.fpkm_tracking')
tf = read.table(ff, header = T, sep = "\t", as.is = T)
ids_rna = tf$tracking_id[tf$tracking_id %in% ids & tf$FPKM > 0]
p_rna = sprintf("%.01f", length(ids_rna) / length(ids) * 100)
} else {
p_rna = '-'
}
if(qname != tname) {
ft = sprintf("%s/%s_HM101/51_ortho/31.ortho.tbl", Sys.getenv("misc3"), qname)
tt = read.table(ft, header = T, sep = "\t", as.is = T)
ids_hom = tt$qid[tt$qid %in% ids]
p_hom = sprintf("%.01f", length(ids_hom) / length(ids) * 100)
} else {
p_hom = '-'
}
p_sup = sprintf("%.01f", length(unique(c(ids_rna, ids_hom))) / length(ids) * 100)
stats[[qname]] = matrix(c(ngene, n_te, n_nonte, n_nbs, n_fbx, n_rlk, n_ncr, med_prot, p_rna, p_hom, p_sup), nrow = 1, dimnames = list(NULL, c("# Total Genes", 'TE', 'non-TE','NBS-LRR','F-box','LRR-RLK','NCR',"Median Prot Length", 'RNA-seq (%)', 'Homology (%)', 'RNA-seq + Homology (%)')))
}
ds = do.call(rbind.data.frame, stats)
#for (i in 1:ncol(do)) { do[,i] = format(do[,i], big.mark = ",") }
fo = file.path(diro, "03_annotation.tbl")
write.table(ds, fo, sep = "\t", row.names = T, col.names = T, quote = F)
ds = do.call(rbind.data.frame, statd)
fo = file.path(diro, "04.annotation.detail.tbl")
write.table(t(ds), fo, sep = "\t", row.names = T, col.names = T, quote = F)
ds = do.call(rbind.data.frame, nstats)
colsums = apply(ds, 2, sum)
do = ds[,colsums>0]
do = t(do)
do = cbind('sub-family' = rownames(do), do)
fo = file.path(diro, "04.nbs.tbl")
write.table(do, fo, sep = "\t", row.names = F, col.names = T, quote = F)
ds = do.call(rbind.data.frame, rstats)
colsums = apply(ds, 2, sum)
do = ds[,colsums>0]
do = t(do)
do = cbind('sub-family' = rownames(do), do)
fo = file.path(diro, "04.rlk.tbl")
write.table(do, fo, sep = "\t", row.names = F, col.names = T, quote = F)
ds = do.call(rbind.data.frame, cstats)
colsums = apply(ds, 2, sum)
do = ds[,colsums>0]
do = t(do)
do = cbind('sub-family' = rownames(do), do)
fo = file.path(diro, "04.crp.tbl")
write.table(do, fo, sep = "\t", row.names = F, col.names = T, quote = F)
ds = do.call(rbind.data.frame, tstats)
colsums = apply(ds, 2, sum)
do = ds[,colsums>0]
do = t(do)
do = cbind('sub-family' = rownames(do), do)
fo = file.path(diro, "04.te.tbl")
write.table(do, fo, sep = "\t", row.names = F, col.names = T, quote = F)
##### comparative / novel-seq stats
stats = list()
for (qname in qnames) {
#add repeatmasker stats
dir = sprintf("%s/%s", Sys.getenv("genome"), qname)
flen = file.path(dir, "15.sizes")
tlen = read.table(flen, sep = "\t", header = F, as.is = T)
total_len = sum(tlen$V2)
fgap = file.path(dir, "16.gap.bed")
tgap = read.table(fgap, sep = "\t", header = F, as.is = T)
total_gap = sum(tgap$V3 - tgap$V2)
total_bases = total_len - total_gap
fcds = file.path(dir, "51.tbl")
tcds = read.table(fcds, sep = "\t", header = F, as.is = T)[,1:6]
tcds = tcds[tcds$V6 == 'cds',]
grc = GRanges(seqnames = tcds$V1, ranges = IRanges(tcds$V2, end = tcds$V3))
grc = reduce(grc)
frep = file.path(dir, "12.rm.bed")
trep = read.table(frep, sep = "\t", header = F, as.is = T)
brep = sum(trep$V3 - trep$V2)
pct_rep = brep / total_bases * 100
dir = sprintf("%s/%s_%s/23_blat", Sys.getenv("misc3"), qname, tname)
fi = file.path(dir, '41.5/gax')
ti = read.table(fi, header = F, sep = "\t", as.is = T)[,c(1:3,10)]
colnames(ti) = c('tid', 'tbeg', 'tend', 'lev')
aligned = sum(ti$tend - ti$tbeg + 1)
# pct_aligned = aligned / total_bases * 100
fi = file.path(dir, '31.9/gax')
ti = read.table(fi, header = F, sep = "\t", as.is = T)[,c(1:3,10)]
colnames(ti) = c('tid', 'tbeg', 'tend', 'lev')
ti = ti[ti$lev <= 2,]
synteny = sum(ti$tend - ti$tbeg + 1)
# pct_synteny = synteny / total_bases * 100
dir = sprintf("%s/%s_%s/41_novseq", Sys.getenv("misc3"), qname, tname)
fi = file.path(dir, '21.bed')
ti = read.table(fi, sep = "\t", header = F, as.is = T)
bnov = sum(ti$V3 - ti$V2)
pnov = bnov / total_bases * 100
grn = GRanges(seqnames = ti$V1, ranges = IRanges(ti$V2+1, end = ti$V3))
grn = reduce(grn)
bcds = sum(width(intersect(grc, grn)))
pcds = bcds / bnov * 100
total_bases = format(total_bases, big.mark = ",")
brep = format(brep, big.mark = ",")
# pct_rep = sprintf("%.01f%%", pct_rep)
aligned = format(aligned, big.mark = ",")
# pct_aligned = sprintf("%.01f%%", pct_aligned)
synteny = format(synteny, big.mark = ",")
# pct_synteny = sprintf("%.01f%%", pct_synteny)
bnov = format(bnov, big.mark = ",")
pnov = sprintf("%.01f%%", pnov)
bcds = format(bcds, big.mark = ",")
pcds = sprintf("%.01f%%", pcds)
stats[[qname]] = matrix(c(total_bases, brep, aligned, synteny, bnov, pnov, bcds, pcds),
nrow = 1, dimnames = list(NULL, c("Total Bases", "Repetitive", "Alignable to HM101", "Bases in Synteny Blocks", "Novel Sequences", "", "Novel Coding Seq", "")))
}
ds = do.call(rbind.data.frame, stats)
fo = file.path(diro, "11_comp_stat.tbl")
write.table(ds, fo, sep = "\t", row.names = T, col.names = T, quote = F)
##### variant stats
stats_bp = list()
stats_cnt = list()
stats_snpd = list()
for (qname in qnames) {
dir = sprintf("%s/%s_%s", Sys.getenv("misc3"), qname, tname)
fi = file.path(dir, '23_blat/31.9/gax')
ti = read.table(fi, header = F, sep = "\t", as.is = T)[,c(1:3,10)]
colnames(ti) = c('tid', 'tbeg', 'tend', 'lev')
ti = ti[ti$lev == 1,]
ali = sum(ti$tend - ti$tbeg + 1)
fi = file.path(dir, '23_blat/31.9/snp')
ti = read.table(fi, header = F, sep = "\t", as.is = T)[,c(1:2,8)]
colnames(ti) = c('chr', 'beg', 'lev')
snp = sum(ti$lev == 1)
snpd = snp / ali
fv = file.path(dir, '31_sv/05.stx')
tv = read.table(fv, header = T, sep = "\t", as.is = T)
ti = tv[tv$type == 'INS',]
ti = cbind(ti, len = ti$qend - ti$qbeg + 1)
tis = ti[ti$len < 50,]
si_n = nrow(tis); si_b = sum(tis$len)
til = ti[ti$len >= 50,]
li_n = nrow(til); li_b = sum(til$len)
ti = tv[tv$type == 'CNG',]
ti = cbind(ti, len = ti$qend - ti$qbeg + 1)
g_n = nrow(ti); g_b = sum(ti$len)
ti = tv[tv$type == 'DEL',]
ti = cbind(ti, len = ti$tend - ti$tbeg + 1)
tis = ti[ti$len < 50,]
sd_n = nrow(tis); sd_b = sum(tis$len)
til = ti[ti$len >= 50,]
ld_n = nrow(til); ld_b = sum(til$len)
ti = tv[tv$type == 'CNL',]
ti = cbind(ti, len = ti$tend - ti$tbeg + 1)
l_n = nrow(ti); l_b = sum(ti$len)
ti = tv[tv$type %in% c('TLC:INS', 'TLC:DEL'),]
gri = with(ti, GRanges(seqnames = tchr, ranges = IRanges(tbeg, end = tend)))
grr = reduce(gri)
t_n = length(grr); t_b = sum(width(grr))
# stat = c(snp, si_n, si_b, sd_n, sd_b, li_n, li_b, ld_n, ld_b,
# g_n, g_b, l_n, l_b, t_n, t_b)
# stat = format(stat, big.mark = ",")
# stat = c(stat[1], snpd, stat[-1])
cnames = c("SNP", "sIns", "sDel", "lIns", "lDel", "CNG", "CNL", "TLC")
stats_cnt[[qname]] = matrix(c(snp, si_n, sd_n, li_n, ld_n, g_n, l_n, t_n), nrow = 1, dimnames = list(NULL, cnames))
stats_bp[[qname]] = matrix(c(snp, si_b, sd_b, li_b, ld_b, g_b, l_b, t_b), nrow = 1, dimnames = list(NULL, cnames))
stats_snpd[[qname]] = snpd
}
tcn = do.call(rbind.data.frame, stats_cnt)
tbp = do.call(rbind.data.frame, stats_bp)
tsd = data.frame(org = qnames, snpd = as.numeric(unlist(stats_snpd[qnames])))
t1 = cbind(org = sprintf("%s(#)", qnames), format(tcn, big.mark = ","))
t2 = cbind(org = sprintf("%s(bp)", qnames), format(tbp, big.mark = ","))
t2$SNP = ''
t3 = rbind(t1, t2)
snpd = rep('', nrow(t3))
snpd[t3$SNP != ''] = sub("^(-?)0.", "\\1.", sprintf("%.04f", tsd$snpd))
t4 = cbind(t3[,1:2], snpd = snpd, t3[,3:ncol(t3)])
t4$snpd[t4$SNP==''] = ''
colnames(t4)[c(3:7,10)] = c("SNP Density", "Small Ins", "Small Del", "Large Ins", "Large Del", "Translocation")
fo = file.path(diro, "15_vnt_stat.tbl")
write.table(t4, fo, sep = "\t", row.names = F, col.names = T, quote = F)
##### correlation of genome size estimates with Kelly's data
tsa = data.frame(org = c(tname, qnames), tspan = NA, tbase = NA, stringsAsFactors = F)
for (i in 1:nrow(tsa)) {
org = tsa$org[i]
dir = sprintf("%s/%s", Sys.getenv("genome"), org)
flen = file.path(dir, "15.sizes")
tlen = read.table(flen, sep = "\t", header = F, as.is = T)
tsa$tspan[i] = sum(tlen$V2)
fgap = file.path(dir, "16.gap.bed")
tgap = read.table(fgap, sep = "\t", header = F, as.is = T)
total_gap = sum(tgap$V3 - tgap$V2)
tsa$tbase[i] = tsa$tspan[i] - total_gap
}
fsk = file.path(diro, "genome_size", "01.kelly.tbl")
tsk1 = read.table(fsk, header = T, sep = "\t", as.is = T)
tsk = ddply(tsk1, .(org), summarise, wt = mean(wt))
tsk$org[tsk$org == 'HM029'] = 'HM340'
tsk$org[tsk$org == 'HM018'] = 'HM023'
to = merge(tsa, tsk, by = "org")
to = cbind(to, tspanr = to$tspan / mean(to$tspan))
fit = lm(tspanr ~ wt, data = to)
fitsum = summary(fit)
f = fitsum$fstatistic
pval = pf(f[1], f[2], f[3], lower.tail = F)
attributes(pval) <- NULL
txt = sprintf("'R'^2*' = %.03f p-value = %.04f'", fitsum$r.squared, pval)
corc = cor(to$tspanr, to$wt)
pval = cor.test(to$tspanr, to$wt)$p.value
txt = sprintf("correlation coefficient = %.03f p-value = %.04f", corc, pval)
p = ggplot(to) +
geom_point(aes(x = wt, y = tspanr), shape = 4, size = 2) +
# geom_text(aes(x = n_org, y = 0, label = org), geom_params=list(size = 2.5, vjust = 0, angle = 30)) +
stat_smooth(aes(x = wt, y = tspanr), method = 'lm', fill = 'azure3', size = 0.5) +
# scale_shape(name = "", solid = FALSE, guide = F) +
# scale_color_manual(name = "", labels = c('Core-genome', 'Pan-genome'), values = c("dodgerblue", "firebrick1"), guide = guide_legend(label.position = "left", direction = "horizontal", title.theme = element_text(size = 8, angle = 0), label.theme = element_text(size = 8, angle = 0))) +
scale_x_continuous(name = 'Genome size estimate (fluorometry)') +
scale_y_continuous(name = 'AllPaths assembly size (normailized)', expand = c(0.02, 0)) +
theme_bw() +
theme(axis.ticks.length = unit(0, 'lines'), axis.ticks.margin = unit(0.2, 'lines')) +
theme(legend.position = "top", legend.key.size = unit(1, 'lines'), legend.background = element_rect(fill = 'white', size=0), legend.margin = unit(0, "line")) +
theme(plot.margin = unit(c(0,1,0,0), "lines")) +
theme(axis.title.x = element_text(size = 10, angle = 0)) +
theme(axis.title.y = element_text(size = 10, angle = 90)) +
theme(axis.text.x = element_text(size = 9, color = "dodgerblue4")) +
theme(axis.text.y = element_text(size = 9, color = "dodgerblue4", angle = 0, hjust = 0.5)) +
annotate("text", x = 0.97, y = 1.05, size = 3, hjust = 0, label = txt, parse = F)
fp = file.path(diro, "genome.size.pdf")
ggsave(p, filename = fp, width = 6, height = 5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.