Skip to content

Commit

Permalink
Merge sort memory requirement diagnostic (PR samtools#581)
Browse files Browse the repository at this point in the history
(Unindented the string text to improve 80-column legibility of the
source code.)
  • Loading branch information
jmarshall committed May 20, 2016
2 parents 4db01dd + 4dbadcf commit b2b81b4
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 1 deletion.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ Noteworthy changes in samtools:
sites with zero depth; repeating the option as -aa or -a -a additionally
shows reference sequences without any reads mapped to them (#496).

* To stop it from creating too many temporary files, samtools sort
will now not run unless its per-thread memory limit (-m) is set to
at least 1 megabyte (#547).


Beta Release 1.3.1 (22 April 2016)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
35 changes: 34 additions & 1 deletion bam_sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/sam.h"
#include "sam_opts.h"

/* Minimum memory required in megabytes before sort will attempt to run. This
is to prevent accidents where failing to use the -m option correctly results
in the creation of a temporary file for each read in the input file.
Don't forget to update the man page if you change this. */
const size_t SORT_MIN_MEGS_PER_THREAD = 1;

/* Default per-thread memory for sort. Must be >= SORT_MIN_MEGS_PER_THREAD.
Don't forget to update the man page if you change this. */
const size_t SORT_DEFAULT_MEGS_PER_THREAD = 768;

#if !defined(__DARWIN_C_LEVEL) || __DARWIN_C_LEVEL < 900000L
#define NEED_MEMSET_PATTERN4
#endif
Expand Down Expand Up @@ -1857,9 +1867,26 @@ static void sort_usage(FILE *fp)
sam_global_opt_help(fp, "-.O..");
}

static void complain_about_memory_setting(size_t max_mem) {
char *suffix = "";
const size_t nine_k = 9<<10;
if (max_mem > nine_k) { max_mem >>= 10; suffix = "K"; }
if (max_mem > nine_k) { max_mem >>= 10; suffix = "M"; }

fprintf(stderr,
"[bam_sort] -m setting (%zu%s bytes) is less than the minimum required (%zuM).\n\n"
"Trying to run with -m too small can lead to the creation of a very large number\n"
"of temporary files. This may make sort fail due to it exceeding limits on the\n"
"number of files it can have open at the same time.\n\n"
"Please check your -m parameter. It should be an integer followed by one of the\n"
"letters K (for kilobytes), M (megabytes) or G (gigabytes). You should ensure it\n"
"is at least the minimum above, and much higher if you are sorting a large file.\n",
max_mem, suffix, SORT_MIN_MEGS_PER_THREAD);
}

int bam_sort(int argc, char *argv[])
{
size_t max_mem = 768<<20; // 512MB
size_t max_mem = SORT_DEFAULT_MEGS_PER_THREAD << 20;
int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
char *fnout = "-", modeout[12];
kstring_t tmpprefix = { 0, 0, NULL };
Expand Down Expand Up @@ -1910,6 +1937,12 @@ int bam_sort(int argc, char *argv[])
goto sort_end;
}

if (max_mem < (SORT_MIN_MEGS_PER_THREAD << 20)) {
complain_about_memory_setting(max_mem);
ret = EXIT_FAILURE;
goto sort_end;
}

strcpy(modeout, "wb");
sam_open_mode(modeout+1, fnout, NULL);
if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
Expand Down
3 changes: 3 additions & 0 deletions samtools.1
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,9 @@ or with a
.BR K ", " M ", or " G
suffix.
[768 MiB]
.IP
To prevent sort from creating a huge number of temporary files, it enforces a
minimum value of 1M for this setting.
.TP
.B -n
Sort by read names (i.e., the
Expand Down

0 comments on commit b2b81b4

Please sign in to comment.