-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbreaktigs.rmd
69 lines (61 loc) · 1.54 KB
/
breaktigs.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
---
title: Break scaffolds at loci not supported by molecules
author: Shaun Jackman
output:
html_document:
keep_md: true
params:
input_tsv:
label: "Input TSV file of breakpoints"
value: "breakpoints.tsv"
input: text
input_fai:
label: "Input FAI file of scaffold sizes"
value: "draft.fa.fai"
input: text
output_bed:
label: "Output BED file of breaktig coordinates"
value: "breakpoints.tigs.bed"
input: text
---
```{r setup, message=FALSE}
library(dplyr)
library(knitr)
library(readr)
library(tidyr)
library(uniqtag)
knit_print.data.frame <- function(x, ...) kable(x) %>% paste(collapse = "\n") %>% asis_output
input_tsv <- params$input_tsv
input_fai <- params$input_fai
output_bed <- params$output_bed
```
# Read the data
```{r read-data}
input_tsv
breakpoints_orig <- read_tsv(input_tsv, col_types = cols(Rname = col_character()))
glimpse(breakpoints_orig)
breakpoints <- breakpoints_orig %>% select(Rname, Pos)
input_fai
fai <- read_tsv(input_fai, col_names = c("Rname", "Pos"), col_types = "ci---")
glimpse(fai)
```
# Determine coordinates of subsequences
```{r transform}
bed <- rbind(breakpoints, mutate(fai, Pos = 0L), fai) %>%
arrange(!is.na(as.integer(Rname)), as.integer(Rname), Rname, Pos) %>%
mutate(Start = Pos, End = lead(Pos)) %>%
select(-Pos) %>%
drop_na() %>%
filter(Start <= End) %>%
mutate(Name = make_unique(Rname))
glimpse(bed)
```
# Write the BED file to disk
```{r write-results}
output_bed
bed %>% write_tsv(output_bed, col_names = FALSE)
```
# Coordinates of subsequences
```{r table}
bed
```