Skip to content

Commit

Permalink
Use fcntl(F_FULLFSYNC) on OS X (facebook#9356)
Browse files Browse the repository at this point in the history
Summary:
Closing facebook#5954

fsync/fdatasync on Linux:
```
(fsync/fdatasync) includes writing through or flushing a disk cache if present.
```

However, on OS X and iOS:
```
(fsync) will flush all data from the host to the drive (i.e. the "permanent storage device"),
the drive itself may not physically write the data to the platters for quite some time and it
may be written in an out-of-order sequence.
```

Solution is to use `fcntl(F_FULLFSYNC)` on OS X so that we get the same
persistence guarantee.

According to OSX man page,
```
The F_FULLFSYNC fcntl asks the drive to flush **all** buffered data to permanent storage.
```
This suggests that it will be no faster than `fsync` on Linux, since Linux, according to its man page,
```
writing through or flushing a disk cache if present
```
It means Linux may not flush **all** data from disk cache.

This is similar to bug reports/fixes in:
- golang: golang/go#26650
- leveldb: google/leveldb@296de8d.

Not sure if we should fallback to fsync since we break persistence contract.

Pull Request resolved: facebook#9356

Reviewed By: jay-zhuang

Differential Revision: D33417416

Pulled By: riversand963

fbshipit-source-id: 475548ff9c5eaccde325e0f6842694271cbc8cb7
  • Loading branch information
riversand963 authored and facebook-github-bot committed Jan 19, 2022
1 parent 5576ded commit 1a8e9f0
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,11 @@ if(HAVE_AUXV_GETAUXVAL)
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
endif()

check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
if(HAVE_FULLFSYNC)
add_definitions(-DHAVE_FULLFSYNC)
endif()

include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/include)
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
Expand Down
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

### Bug Fixes
* Fix a bug that FlushMemTable may return ok even flush not succeed.
* Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS.

## 6.28.0 (2021-12-17)
### New Features
Expand Down
14 changes: 13 additions & 1 deletion build_tools/build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,19 @@ EOF
fi
fi

rm -f test.o
# check for F_FULLFSYNC
$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
fcntl(0, F_FULLFSYNC);
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
fi

rm -f test.o test_dl.o

PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
Expand Down
47 changes: 46 additions & 1 deletion env/io_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1108,9 +1108,15 @@ IOStatus PosixMmapFile::Flush(const IOOptions& /*opts*/,

IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("while fcntl(F_FULLSYNC) mmapped file", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fdatasync(fd_) < 0) {
return IOError("While fdatasync mmapped file", filename_, errno);
}
#endif // HAVE_FULLFSYNC

return Msync();
}
Expand All @@ -1120,9 +1126,15 @@ IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/,
*/
IOStatus PosixMmapFile::Fsync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("While fcntl(F_FULLSYNC) on mmaped file", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fsync(fd_) < 0) {
return IOError("While fsync mmaped file", filename_, errno);
}
#endif // HAVE_FULLFSYNC

return Msync();
}
Expand Down Expand Up @@ -1320,17 +1332,29 @@ IOStatus PosixWritableFile::Flush(const IOOptions& /*opts*/,

IOStatus PosixWritableFile::Sync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fdatasync(fd_) < 0) {
return IOError("While fdatasync", filename_, errno);
}
#endif // HAVE_FULLFSYNC
return IOStatus::OK();
}

IOStatus PosixWritableFile::Fsync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fsync(fd_) < 0) {
return IOError("While fsync", filename_, errno);
}
#endif // HAVE_FULLFSYNC
return IOStatus::OK();
}

Expand Down Expand Up @@ -1503,17 +1527,29 @@ IOStatus PosixRandomRWFile::Flush(const IOOptions& /*opts*/,

IOStatus PosixRandomRWFile::Sync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("while fcntl(F_FULLFSYNC) random rw file", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fdatasync(fd_) < 0) {
return IOError("While fdatasync random read/write file", filename_, errno);
}
#endif // HAVE_FULLFSYNC
return IOStatus::OK();
}

IOStatus PosixRandomRWFile::Fsync(const IOOptions& /*opts*/,
IODebugContext* /*dbg*/) {
#ifdef HAVE_FULLFSYNC
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("While fcntl(F_FULLSYNC) random rw file", filename_, errno);
}
#else // HAVE_FULLFSYNC
if (fsync(fd_) < 0) {
return IOError("While fsync random read/write file", filename_, errno);
}
#endif // HAVE_FULLFSYNC
return IOStatus::OK();
}

Expand Down Expand Up @@ -1585,10 +1621,19 @@ IOStatus PosixDirectory::FsyncWithDirOptions(
}
// fallback to dir-fsync for kDefault, kDirRenamed and kFileDeleted
}
#ifdef HAVE_FULLFSYNC
// btrfs is a Linux file system, while currently F_FULLFSYNC is available on
// Mac OS.
assert(!is_btrfs_);
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
return IOError("while fcntl(F_FULLFSYNC)", "a directory", errno);
}
#else // HAVE_FULLFSYNC
if (fsync(fd_) == -1) {
s = IOError("While fsync", "a directory", errno);
}
#endif
#endif // HAVE_FULLFSYNC
#endif // OS_AIX
return s;
}
} // namespace ROCKSDB_NAMESPACE
Expand Down

0 comments on commit 1a8e9f0

Please sign in to comment.