Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 30 additions & 69 deletions DIMS/HMDBparts.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# adapted from hmdb_parts.R

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)

db_file <- cmd_args[1]
breaks_file <- cmd_args[2]
standard_run <- cmd_args[4]
standard_run <- cmd_args[3]

# load file with binning breaks
load(breaks_file)
Expand All @@ -15,8 +13,8 @@ max_mz <- round(breaks_fwhm[length(breaks_fwhm)])
# In case of a standard run use external HMDB parts
# m/z is approximately 70 to 600: set limits between 68-71 for min and 599-610 for max
if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < 610) {
# skip generating HMDB parts
hmdb_parts_path <- cmd_args[3]
# skip generating HMDB parts; copy them from hmdb_parts_path
hmdb_parts_path <- cmd_args[4]
# find all files containing hmdb in file name
hmdb_parts <- list.files(hmdb_parts_path, pattern = "hmdb")
for (hmdb_file in hmdb_parts) {
Expand All @@ -25,8 +23,24 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz <
} else {
# generate HMDB parts in case of non-standard mz range
load(db_file)
ppm <- as.numeric(cmd_args[5])

# determine segments of m/z for HMDB parts; smaller parts for m/z < 100
mz_segments <- c()
segment_start <- min_mz
segment_end <- min_mz + 5
while (segment_end < max_mz) {
if (segment_start < 100) {
segment_size = 5
} else {
segment_size = 10
}
mz_segments <- c(mz_segments, segment_start)
segment_start <- segment_start + segment_size
segment_end <- segment_end + segment_size
}
#last segment
mz_segments <- c(mz_segments, max_mz)

scanmodes <- c("positive", "negative")
for (scanmode in scanmodes) {
if (scanmode == "negative") {
Expand All @@ -38,73 +52,20 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz <
}

# filter mass range meassured
hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= breaks_fwhm[1] &
hmdb_add_iso[ , column_label] <= breaks_fwhm[length(breaks_fwhm)]), ]
hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= min_mz &
hmdb_add_iso[ , column_label] <= max_mz), ]

# sort on mass
outlist <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),]
nr_rows <- dim(outlist)[1]

# maximum number of rows per file
sub <- 20000
end <- 0
last_line <- sub
check <- 0
outlist_part <- NULL
sorted_hmdb_add_iso <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),]
nr_rows <- dim(sorted_hmdb_add_iso)[1]

# create parts and save to file
if (nr_rows < sub) {
outlist_part <- outlist
save(outlist_part, file = paste0(scanmode, "_hmdb.1.RData"))
} else if (nr_rows >= sub & (floor(nr_rows / sub) - 1) >= 2) {
for (i in 2:floor(nr_rows / sub) - 1) {
start <- -(sub - 1) + i * sub
end <- i * sub

if (i > 1){
outlist_i = outlist[c(start:end),]
nr_moved = 0
# Use ppm to replace border to avoid cut within peak group
while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 /
as.numeric(outlist_i[1, column_label]) < ppm) {
outlist_part <- rbind(outlist_part, outlist_i[1, ])
outlist_i <- outlist_i[-1, ]
nr_moved <- nr_moved + 1
}

save(outlist_part, file = paste(scanmode, "_", paste("hmdb", i-1, "RData", sep = "."), sep = ""))
check <- check + dim(outlist_part)[1]

outlist_part <- outlist_i
last_line <- dim(outlist_part)[1]

} else {
outlist_part <- outlist[c(start:end),]
}
}

start <- end + 1
end <- nr_rows
outlist_i <- outlist[c(start:end), ]
nr_moved <- 0

if (!is.null(outlist_part)) {
# Calculate ppm and replace border, avoid cut within peak group
while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 /
as.numeric(outlist_i[1, column_label]) < ppm) {
outlist_part <- rbind(outlist_part, outlist_i[1, ])
outlist_i <- outlist_i[-1, ]
nr_moved <- nr_moved + 1
}

save(outlist_part, file = paste0(scanmode, "_hmdb.", i, ".RData"))
check <- check + dim(outlist_part)[1]
}

outlist_part <- outlist_i
save(outlist_part, file = paste0(scanmode, "_hmdb.", i + 1, ".RData"))
check <- check + dim(outlist_part)[1]
for (mz_part_index in 1:(length(mz_segments) - 1)) {
mz_start <- mz_segments[mz_part_index]
mz_end <- mz_segments[mz_part_index + 1]
outlist_part <- sorted_hmdb_add_iso[sorted_hmdb_add_iso[ , column_label] > mz_start &
sorted_hmdb_add_iso[ , column_label] <= mz_end, ]
save(outlist_part, file = paste0(scanmode, "_hmdb.", mz_part_index, ".RData"))
}
}
}

2 changes: 1 addition & 1 deletion DIMS/HMDBparts.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ process HMDBparts {

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.hmdb_parts_files $params.standard_run $params.ppm
Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.standard_run $params.hmdb_parts_files
"""
}
Loading