From e7c4377f9cba4a953d8a62ea866a92425b6fda66 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 23 Jun 2021 22:18:29 -0700 Subject: [PATCH] Set H5Pset_alignment & H5Pset_sieve_buf_size Especially H5Pset_alignment is pretty important for MPI-parallel performance. --- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index a0cdb0484f..7503be4202 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -96,6 +96,50 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( status = H5Pset_cache(m_fileAccessProperty, metaCacheElements, rawCacheElements, rawCacheSize, policy); VERIFY(status >= 0, "[HDF5] Internal error: Failed to set H5Pset_cache"); + /* File alignment - important for parallel I/O + * + * Sets the alignment properties of a file access property list so that any + * file object greater than or equal in size to threshold bytes will be + * aligned on an address which is a multiple of alignment. The addresses + * are relative to the end of the user block; the alignment is calculated + * by subtracting the user block size from the absolute file address and + * then adjusting the address to be a multiple of alignment. + * + * Default values for threshold and alignment are one, implying no + * alignment. Generally the default values will result in the best + * performance for single-process access to the file. For MPI IO and other + * parallel systems, choose an alignment which is a multiple of the disk + * block size. + * + * IN: Threshold value. Note that setting the threshold value to 0 (zero) + * has the effect of a special case, forcing everything to be aligned. + * IN: Alignment value. + * + * Good numbers are roughly FS blocksize or transfer block sizes determined + * by the filesystem, network and routers at play. + */ + hsize_t threshold = bytes / 2u; + hsize_t alignment = bytes; + H5Pset_alignment( m_fileAccessProperty, threshold, alignment ); + + /* maximum size in bytes of the data sieve buffer, which is used by file + * drivers that are capable of using data sieving. The data sieve buffer + * is used when performing I/O on datasets in the file. Using a buffer + * which is large enough to hold several pieces of the dataset being read + * in for hyperslab selections boosts performance by quite a bit. + * + * The default value is set to 64KB, indicating that file I/O for raw data + * reads and writes will occur in at least 64KB blocks. Setting the value + * to 0 with this API function will turn off the data sieving, even if the + * VFL driver attempts to use that strategy. + * + * Internally, the library checks the storage sizes of the datasets in the + * file. It picks the smaller one between the size from the file access + * property and the size of the dataset to allocate the sieve buffer for + * the dataset in order to save memory usage. + */ + H5Pset_sieve_buf_size( m_fileAccessProperty, bytes ); /* >=FS Blocksize*/ + // sets the maximum size for the type conversion buffer and background // buffer and optionally supplies pointers to application-allocated // buffers. If the buffer size is smaller than the entire amount of data