-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprep_all_study_qc.R
68 lines (56 loc) · 2.45 KB
/
prep_all_study_qc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
##' .. content for \description{} (no empty lines) ..
##'
##' .. content for \details{} ..
##'
##' @title
##' @param all_study_snvs
##' @return
##' @author whtns
##' @export
prep_all_study_qc <- function(all_study_snvs) {
examined_studies <- c( "Zhang", "McEvoy", "Kooi", "Liu", "Stachelek")
study_numbers <- c( "Zhang" = 4,
"McEvoy" = 10,
"Kooi" = 71,
"Stachelek CL" = 12,
"Stachelek T" = 12,
"Afshar" = 32,
"Liu" = 63) %>%
tibble::enframe("sample_set", "sample_number")
plot_input <-
all_study_snvs %>%
dplyr::distinct(chr, start, end, ref, alt, sample, .keep_all = TRUE) %>%
dplyr::mutate(study = str_replace(study, " et al.", "")) %>%
dplyr::filter(study %in% examined_studies) %>%
# dplyr::mutate(study = factor(study, levels = examined_studies)) %>%
dplyr::mutate(sample_set = dplyr::case_when(study == "Stachelek" & grepl("CL", sample) ~ "Stachelek CL",
study == "Stachelek" & grepl("T", sample) ~ "Stachelek T",
TRUE ~ study
))
vaf_per_study <- plot_input
vars_per_study <-
plot_input %>%
dplyr::group_by(sample_set, sample) %>%
dplyr::count() %>%
identity()
rows_to_add <-
vars_per_study %>%
group_by(sample_set) %>%
summarise(count = dplyr::n()) %>%
dplyr::left_join(study_numbers, by = "sample_set") %>%
dplyr::mutate(zerod_samples = sample_number - count) %>%
dplyr::select(sample_set, zerod_samples) %>%
dplyr::filter(zerod_samples > 0) %>%
group_by(sample_set) %>%
dplyr::mutate(sample = list(stringi::stri_rand_strings(zerod_samples, 5))) %>%
tidyr::unnest(sample) %>%
dplyr::mutate(n = 0) %>%
dplyr::select(-zerod_samples) %>%
identity()
vars_per_study <-
dplyr::bind_rows(vars_per_study, rows_to_add) %>%
dplyr::mutate(sample_set = factor(sample_set, levels = c("Zhang", "Kooi", "McEvoy", "Liu", "Stachelek CL", "Stachelek T")))
# all_study_qc <-
# dplyr::left_join(vars_per_study, vaf_per_study, by = c("sample_set", "sample"))
list(vaf_per_study = vaf_per_study, vars_per_study = vars_per_study)
}