UMCUGenetics · mraves2 · Jul 15, 2025 · Jul 15, 2025 · Sep 30, 2025
diff --git a/DIMS/HMDBparts.R b/DIMS/HMDBparts.R
@@ -1,11 +1,9 @@
-# adapted from hmdb_parts.R
-
 # define parameters 
 cmd_args <- commandArgs(trailingOnly = TRUE)
 
 db_file <- cmd_args[1] 
 breaks_file <- cmd_args[2] 
-standard_run <- cmd_args[4] 
+standard_run <- cmd_args[3] 
 
 # load file with binning breaks 
 load(breaks_file)
@@ -15,8 +13,8 @@ max_mz <- round(breaks_fwhm[length(breaks_fwhm)])
 # In case of a standard run use external HMDB parts
 # m/z is approximately 70 to 600: set limits between 68-71 for min and 599-610 for max
 if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < 610) {
-  # skip generating HMDB parts
-  hmdb_parts_path <- cmd_args[3] 
+  # skip generating HMDB parts; copy them from hmdb_parts_path
+  hmdb_parts_path <- cmd_args[4] 
   # find all files containing hmdb in file name
   hmdb_parts <- list.files(hmdb_parts_path, pattern = "hmdb") 
   for (hmdb_file in hmdb_parts) {
@@ -25,8 +23,24 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz <
 } else { 
   # generate HMDB parts in case of non-standard mz range
   load(db_file)
-  ppm <- as.numeric(cmd_args[5])
 
+  # determine segments of m/z for HMDB parts; smaller parts for m/z < 100
+  mz_segments <- c()
+  segment_start <- min_mz
+  segment_end <- min_mz + 5
+  while (segment_end < max_mz) {
+    if (segment_start < 100) {
+      segment_size = 5
+    } else {
+      segment_size = 10
+    }
+    mz_segments <- c(mz_segments, segment_start)
+    segment_start <- segment_start + segment_size
+    segment_end <- segment_end + segment_size
+  }
+  #last segment
+  mz_segments <- c(mz_segments, max_mz)
+
   scanmodes <- c("positive", "negative")
   for (scanmode in scanmodes) {
     if (scanmode == "negative") {
@@ -38,73 +52,20 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz <
     }
 
     # filter mass range meassured
-    hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= breaks_fwhm[1] &
-                                      hmdb_add_iso[ , column_label] <= breaks_fwhm[length(breaks_fwhm)]), ]
+    hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= min_mz &
+                                      hmdb_add_iso[ , column_label] <= max_mz), ]
 
     # sort on mass
-    outlist <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),]
-    nr_rows <- dim(outlist)[1]
-
-    # maximum number of rows per file
-    sub <- 20000 
-    end <- 0
-    last_line <- sub
-    check <- 0
-    outlist_part <- NULL
+    sorted_hmdb_add_iso <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),]
+    nr_rows <- dim(sorted_hmdb_add_iso)[1]
 
     # create parts and save to file
-    if (nr_rows < sub) {
-      outlist_part <- outlist
-      save(outlist_part, file = paste0(scanmode, "_hmdb.1.RData"))
-    } else if (nr_rows >= sub & (floor(nr_rows / sub) - 1) >= 2) {
-      for (i in 2:floor(nr_rows / sub) - 1) {
-        start <- -(sub - 1) + i * sub
-        end <- i * sub
-
-        if (i > 1){
-          outlist_i = outlist[c(start:end),]
-          nr_moved = 0
-          # Use ppm to replace border to avoid cut within peak group
-          while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / 
-                  as.numeric(outlist_i[1, column_label]) < ppm) {
-            outlist_part <- rbind(outlist_part, outlist_i[1, ])
-            outlist_i <- outlist_i[-1, ]
-            nr_moved <- nr_moved + 1
-          }
-
-          save(outlist_part, file = paste(scanmode, "_", paste("hmdb", i-1, "RData", sep = "."), sep = ""))
-          check <- check + dim(outlist_part)[1]
-
-          outlist_part <- outlist_i
-          last_line <- dim(outlist_part)[1]
-
-        } else {
-          outlist_part <- outlist[c(start:end),]
-        }
-      }
-
-      start <- end + 1
-      end <- nr_rows
-      outlist_i <- outlist[c(start:end), ]
-      nr_moved <- 0
-
-      if (!is.null(outlist_part)) {
-        # Calculate ppm and replace border, avoid cut within peak group
-        while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / 
-                as.numeric(outlist_i[1, column_label]) < ppm) {
-          outlist_part <- rbind(outlist_part, outlist_i[1, ])
-          outlist_i <- outlist_i[-1, ]
-          nr_moved <- nr_moved + 1
-        }
-
-        save(outlist_part, file = paste0(scanmode, "_hmdb.", i, ".RData"))
-        check <- check + dim(outlist_part)[1]
-      }
-
-      outlist_part <- outlist_i
-      save(outlist_part, file = paste0(scanmode, "_hmdb.", i + 1, ".RData"))
-      check <- check + dim(outlist_part)[1]
+    for (mz_part_index in 1:(length(mz_segments) - 1)) {
+      mz_start <- mz_segments[mz_part_index]
+      mz_end <- mz_segments[mz_part_index + 1]
+      outlist_part <- sorted_hmdb_add_iso[sorted_hmdb_add_iso[ , column_label] > mz_start &
+                                          sorted_hmdb_add_iso[ , column_label] <= mz_end, ]
+      save(outlist_part, file = paste0(scanmode, "_hmdb.", mz_part_index, ".RData"))
     }
   }
 }
-
diff --git a/DIMS/HMDBparts.nf b/DIMS/HMDBparts.nf
@@ -13,6 +13,6 @@ process HMDBparts {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.hmdb_parts_files $params.standard_run $params.ppm
+        Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.standard_run $params.hmdb_parts_files
         """
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,6 @@ process HMDBparts { @@
         script:
             """
-            Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.hmdb_parts_files $params.standard_run $params.ppm
+            Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.standard_run $params.hmdb_parts_files
             """
     }