-
Notifications
You must be signed in to change notification settings - Fork 1
/
annotation-commands.R
125 lines (86 loc) · 2.88 KB
/
annotation-commands.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
####biomaRt
library(biomaRt)
listMarts()
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
listDatasets(ensembl)
listFilters(ensembl)
flt <- listFilters(ensembl)
flt[grep("entrez",flt[,1]),]
listAttributes(ensembl)
entrez <- c("673", "837")
attr <- c("entrezgene","hgnc_symbol","ensembl_gene_id","description")
myInfo <- getBM(filters="entrezgene",values=entrez,attributes=attr,
mart=ensembl)
myInfo
getBM(c("ensembl_gene_id", "hgnc_symbol","entrezgene"),
filters = c("chromosome_name","start","end"),
values = list(16,1100000,1250000),mart=ensembl)
getBM(c("ensembl_gene_id", "start_position","end_position"),
filters = c("ensembl_gene_id"),
values = c("ENSG00000260702","ENSG00000260532","ENSG00000181791"),
ensembl)
#####Genome sequence packages
library(BSgenome.Hsapiens.UCSC.hg19)
hg19 <- BSgenome.Hsapiens.UCSC.hg19
gr <- GRanges("chr16", IRanges(1100000, 1250000))
getSeq(hg19, gr)
####Organism-level packages
library(org.Hs.eg.db)
columns(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
select(org.Hs.eg.db, keys=entrez,
keytype="ENTREZID",
columns=c("SYMBOL",
"CHR", "CHRLOC",
"CHRLOCEND"))
select(org.Hs.eg.db, keys = "GO:0003674",
keytype = "GO", columns = "SYMBOL")
####Transcript databases
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txdb
columns(txdb)
keytypes(txdb)
select(txdb, keys=entrez,
keytype="GENEID",
columns=c("TXID",
"TXCHROM", "TXSTART",
"TXEND"))
mygene <- select(txdb, keys = "673", keytype = "GENEID",
columns = c("EXONID", "EXONCHROM", "EXONSTART","EXONEND"))
mygene
GRanges(mygene$EXONCHROM, IRanges(mygene$EXONSTART,
mygene$EXONEND))
trs <- transcripts(txdb)
trs
exs <- exons(txdb)
exs
exons <- exonsBy(txdb, "gene")
is(exons)
length(exons)
exons[["673"]]
gr <- exons[["49"]]
gr <- renameSeqlevels(gr,value = c("chr22"=22))
library(GenomicAlignments)
mybam <- "/home//participant/Course_Materials/Practicals/exampleData/NA19914.chr22.bam"
system.time(bam.sub <- readGAlignments(file = mybam,use.names = TRUE,param = ScanBamParam(which = gr)))
exonList <- split(gr, values(gr)$exon_id)
names(exonList)
exonList[[1]]
gr[1]
system.time(bam.sub2 <- lapply(exonList, function(x) readGAlignments(file=mybam,
use.names=TRUE,
param=ScanBamParam(which=x))))
names(bam.sub2)
bam.sub2[[1]]
system.time(seqs <- getSeq(hg19, exons[["49"]]))
seqs
width(gr)
bam <- readGAlignments(file = mybam)
countOverlaps(gr, bam)
library(ggbio)
autoplot(bam.sub)
autoplot(bam.sub,stat="coverage")
autoplot(txdb,which=exons[["49"]])
tracks(autoplot(txdb,which=exons[["49"]]),
autoplot(bam.sub,stat="coverage"))