Closed
Description
Describe the bug, including details regarding any error messages, version, and platform.
A FileDecryptionProperties
is been used after it has been freed in two places in ParquetFileFormat
.
- A
FileDecryptionProperties
is been deconstructed when leaving methodParquetFileFormat::GetReaderAsync
(threadT440
), which is later (threadT74
) used inside the async lambda (properties
):
arrow/cpp/src/arrow/dataset/file_parquet.cc
Lines 525 to 532 in b907c5d
==203464==ERROR: AddressSanitizer: heap-use-after-free on address 0x5030007d5548 at pc 0x58c146411ce2 bp 0x7de5705fc610 sp 0x7de5705fc600
WRITE of size 4 at 0x5030007d5548 thread T74
#0 0x58c146411ce1 in __gnu_cxx::__atomic_add(int volatile*, int) /usr/include/c++/13/ext/atomicity.h:71
#1 0x58c146411ce1 in __gnu_cxx::__atomic_add_dispatch(int*, int) /usr/include/c++/13/ext/atomicity.h:111
#2 0x58c146411ce1 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_add_ref_copy() /usr/include/c++/13/bits/shared_ptr_base.h:152
#3 0x58c14640bce6 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count(std::__shared_count<(__gnu_cxx::_Lock_policy)2> const&) /usr/include/c++/13/bits/shared_ptr_base.h:1078
#4 0x58c1464d3c64 in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2> const&) (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x1cec64) (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
#5 0x58c1464d3c8e in std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr(std::shared_ptr<parquet::FileDecryptionProperties> const&) (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x1cec8e) (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
#6 0x7de5e90aa676 in parquet::ReaderProperties::ReaderProperties(parquet::ReaderProperties const&) /home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
#7 0x7de5e80e3790 in std::__detail::_MakeUniq<parquet::SerializedRowGroup>::__single_object std::make_unique<parquet::SerializedRowGroup, std::shared_ptr<arrow::io::RandomAccessFile>&, std::shared_ptr<arrow::io::internal::ReadRangeCache>&, long&, parquet::FileMetaData*, int&, parquet::ReaderProperties&, std::shared_ptr<arrow::Buffer> >(std::shared_ptr<arrow::io::RandomAccessFile>&, std::shared_ptr<arrow::io::internal::ReadRangeCache>&, long&, parquet::FileMetaData*&&, int&, parquet::ReaderProperties&, std::shared_ptr<arrow::Buffer>&&) /usr/include/c++/13/bits/unique_ptr.h:1070
#8 0x7de5e80dbf1d in parquet::SerializedFile::GetRowGroup(int) /home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:327
#9 0x7de5e80d5bb3 in parquet::ParquetFileReader::RowGroup(int) /home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:889
#10 0x7de5e7b805bf in parquet::arrow::FileColumnIterator::NextChunk() (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x3805bf) (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
0x5030007d5548 is located 8 bytes inside of 24-byte region [0x5030007d5540,0x5030007d5558)
freed by thread T440 here:
#0 0x7de5e94ff5e8 in operator delete(void*, unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:164
#1 0x7de5e80bc176 in std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, (__gnu_cxx::_Lock_policy)2>::~_Sp_counted_ptr() /usr/include/c++/13/bits/shared_ptr_base.h:419
#2 0x7de5e80bc7d2 in std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, (__gnu_cxx::_Lock_policy)2>::_M_destroy() /usr/include/c++/13/bits/shared_ptr_base.h:432
#3 0x58c1464063db in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() /usr/include/c++/13/bits/shared_ptr_base.h:347
#4 0x58c14640bc63 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() /usr/include/c++/13/bits/shared_ptr_base.h:1071
#5 0x58c1464ca4ab in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr() /usr/include/c++/13/bits/shared_ptr_base.h:1524
#6 0x58c1464ca4cb in std::shared_ptr<parquet::FileDecryptionProperties>::~shared_ptr() /usr/include/c++/13/bits/shared_ptr.h:175
#7 0x58c1464ca591 in parquet::ReaderProperties::~ReaderProperties() /home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
#8 0x7de5e9087118 in arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<parquet::FileMetaData> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:558
#9 0x7de5e90893c0 in arrow::dataset::ParquetFileFormat::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<arrow::dataset::FileFragment> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:652
#10 0x7de5e8de0554 in arrow::dataset::FileFragment::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions> const&) /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_base.cc:188
#11 0x7de5e8ee5049 in FragmentToBatches /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/scanner.cc:307
previously allocated by thread T440 here:
#0 0x7de5e94fe548 in operator new(unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:95
#1 0x7de5e80b701a in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*) /usr/include/c++/13/bits/shared_ptr_base.h:917
#2 0x7de5e80b1d08 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*, std::integral_constant<bool, false>) /usr/include/c++/13/bits/shared_ptr_base.h:928
#3 0x7de5e80ae0ec in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<parquet::FileDecryptionProperties, void>(parquet::FileDecryptionProperties*) (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x8ae0ec) (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
#4 0x7de5e80abbbc in std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr<parquet::FileDecryptionProperties, void>(parquet::FileDecryptionProperties*) /usr/include/c++/13/bits/shared_ptr.h:214
#5 0x7de5e835bdc3 in parquet::FileDecryptionProperties::Builder::build() /home/enrico/Work/git/arrow/cpp/src/parquet/encryption/encryption.h:328
#6 0x7de5e835b813 in parquet::encryption::CryptoFactory::GetFileDecryptionProperties(parquet::encryption::KmsConnectionConfig const&, parquet::encryption::DecryptionConfiguration const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::shared_ptr<arrow::fs::FileSystem> const&) /home/enrico/Work/git/arrow/cpp/src/parquet/encryption/crypto_factory.cc:180
#7 0x7de5e907c44f in MakeReaderProperties /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:86
#8 0x7de5e9086d93 in arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<parquet::FileMetaData> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
#9 0x7de5e90893c0 in arrow::dataset::ParquetFileFormat::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<arrow::dataset::FileFragment> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:652
#10 0x7de5e8de0554 in arrow::dataset::FileFragment::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions> const&) /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_base.cc:188
#11 0x7de5e8ee5049 in FragmentToBatches /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/scanner.cc:307
- In
MakeReaderProperties
, theparquet_scan_options
given to the method is being modified, which is suspicious given the modification relies on other arguments to the method (likepath
andfilesystem
). Concurrent threads overwrite each-others instances:
arrow/cpp/src/arrow/dataset/file_parquet.cc
Lines 66 to 95 in b907c5d
By modifying parquet_scan_options->reader_properties->file_decryption_properties
, the current value that has been assigned by a different thread (T235
) is being deconstructed. It looks like assignment of std::shared_ptr
is not thread-safe. When the other thread access that instance, a segmentation fault occurs when incrementing the reference counter:
==414573==ERROR: AddressSanitizer: heap-use-after-free on address 0x503000e06b08 at pc 0x767802313d7c bp 0x7676929f8ca0 sp 0x7676929f8c90
WRITE of size 4 at 0x503000e06b08 thread T235
#0 0x767802313d7b in __gnu_cxx::__atomic_add(int volatile*, int) /usr/include/c++/13/ext/atomicity.h:71
#1 0x767802313d7b in __gnu_cxx::__atomic_add_dispatch(int*, int) /usr/include/c++/13/ext/atomicity.h:111
#2 0x767802313d7b in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_add_ref_copy() /usr/include/c++/13/bits/shared_ptr_base.h:152
#3 0x767802310a78 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count(std::__shared_count<(__gnu_cxx::_Lock_policy)2> const&) /usr/include/c++/13/bits/shared_ptr_base.h:1078
#4 0x7678025f902e in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2> const&) /usr/include/c++/13/bits/shared_ptr_base.h:1522
#5 0x7678025f9058 in std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr(std::shared_ptr<parquet::FileDecryptionProperties> const&) /usr/include/c++/13/bits/shared_ptr.h:204
#6 0x7678025f93a4 in parquet::ReaderProperties::ReaderProperties(parquet::ReaderProperties const&) /home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
#7 0x7678028db77d in parquet::SerializedFile::SerializedFile(std::shared_ptr<arrow::io::RandomAccessFile>, parquet::ReaderProperties const&) /home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:303
#8 0x7678028d3416 in parquet::ParquetFileReader::Contents::OpenAsync(std::shared_ptr<arrow::io::RandomAccessFile>, parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) /home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:793
#9 0x7678028d4edc in parquet::ParquetFileReader::OpenAsync(std::shared_ptr<arrow::io::RandomAccessFile>, parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) /home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:842
#10 0x767803886510 in operator() /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:531
#11 0x7678038a24d4 in operator()<arrow::dataset::ParquetFileFormat::GetReaderAsync(const arrow::dataset::FileSource&, const std::shared_ptr<arrow::dataset::ScanOptions>&, const std::shared_ptr<parquet::FileMetaData>&) const::<lambda(const std::shared_ptr<arrow::io::RandomAccessFile>&)>, const std::shared_ptr<arrow::io::RandomAccessFile>&> /home/enrico/Work/git/arrow/cpp/src/arrow/util/future.h:178
previously allocated by thread T235 here:
#0 0x767803cfe548 in operator new(unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:95
#1 0x7678028b701a in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*) /usr/include/c++/13/bits/shared_ptr_base.h:917
#2 0x7678028b1d08 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*, std::integral_constant<bool, false>) /usr/include/c++/13/bits/shared_ptr_base.h:928
#3 0x7678028ae0ec in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::__shared_ptr<parquet::FileDecryptionProperties, void>(parquet::FileDecryptionProperties*) (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x8ae0ec) (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
#4 0x7678028abbbc in std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr<parquet::FileDecryptionProperties, void>(parquet::FileDecryptionProperties*) /usr/include/c++/13/bits/shared_ptr.h:214
#5 0x767802b5bdc3 in parquet::FileDecryptionProperties::Builder::build() /home/enrico/Work/git/arrow/cpp/src/parquet/encryption/encryption.h:328
#6 0x767802b5b813 in parquet::encryption::CryptoFactory::GetFileDecryptionProperties(parquet::encryption::KmsConnectionConfig const&, parquet::encryption::DecryptionConfiguration const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::shared_ptr<arrow::fs::FileSystem> const&) /home/enrico/Work/git/arrow/cpp/src/parquet/encryption/crypto_factory.cc:180
#7 0x76780387c42f in MakeReaderProperties /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:86
#8 0x767803886d69 in arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<parquet::FileMetaData> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
0x503000e06b08 is located 8 bytes inside of 24-byte region [0x503000e06b00,0x503000e06b18)
freed by thread T481 here:
#0 0x767803cff5e8 in operator delete(void*, unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:164
#1 0x7678028bc176 in std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, (__gnu_cxx::_Lock_policy)2>::~_Sp_counted_ptr() /usr/include/c++/13/bits/shared_ptr_base.h:419
#2 0x7678028bc7d2 in std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, (__gnu_cxx::_Lock_policy)2>::_M_destroy() /usr/include/c++/13/bits/shared_ptr_base.h:432
#3 0x5b649c444bd0 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release_last_use() (/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x10cbd0) (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
#4 0x5b649c43eb43 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release_last_use_cold() /usr/include/c++/13/bits/shared_ptr_base.h:199
#5 0x5b649c43950d in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() /usr/include/c++/13/bits/shared_ptr_base.h:353
#6 0x5b649c43ec63 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() /usr/include/c++/13/bits/shared_ptr_base.h:1071
#7 0x5b649c4fd4ab in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr() /usr/include/c++/13/bits/shared_ptr_base.h:1524
#8 0x5b649c510151 in std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>::operator=(std::__shared_ptr<parquet::FileDecryptionProperties, (__gnu_cxx::_Lock_policy)2>&&) /usr/include/c++/13/bits/shared_ptr_base.h:1620
#9 0x5b649c509a77 in std::shared_ptr<parquet::FileDecryptionProperties>::operator=(std::shared_ptr<parquet::FileDecryptionProperties>&&) /usr/include/c++/13/bits/shared_ptr.h:440
#10 0x5b649c4fd505 in parquet::ReaderProperties::file_decryption_properties(std::shared_ptr<parquet::FileDecryptionProperties>) /home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:111
#11 0x76780387c472 in MakeReaderProperties /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:88
#12 0x767803886d69 in arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, std::shared_ptr<parquet::FileMetaData> const&) const /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
Component(s)
C++