Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
#ifndef ARROW_NO_DEPRECATED_API

/// \brief Create Buffer referencing std::string memory
/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
///
/// Warning: string instance must stay alive
///
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/arrow/compare.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,27 @@ class Tensor;

#ifndef ARROW_NO_DEPRECATED_API
/// Returns true if the arrays are exactly equal
/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
Status ARROW_EXPORT ArrayEquals(const Array& left, const Array& right, bool* are_equal);

/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
Status ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right,
bool* are_equal);

/// Returns true if the arrays are approximately equal. For non-floating point
/// types, this is equivalent to ArrayEquals(left, right)
/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
Status ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right,
bool* are_equal);

/// Returns true if indicated equal-length segment of arrays is exactly equal
/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
Status ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
int64_t start_idx, int64_t end_idx,
int64_t other_start_idx, bool* are_equal);

/// Returns true if the type metadata are exactly equal
/// \deprecated Since 0.8.0
/// \note Deprecated since 0.8.0
Status ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
bool* are_equal);
#endif
Expand Down
66 changes: 55 additions & 11 deletions cpp/src/arrow/io/file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@

#define _FILE_OFFSET_BITS 64

// define max read/write count
#if defined(_MSC_VER)
#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
#else

#ifdef __APPLE__
// due to macOS bug, we need to set read/write max
#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
#else
// see notes on Linux read/write manpage
#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
#endif

#endif

#include "arrow/io/file.h"

#if _WIN32 || _WIN64
Expand Down Expand Up @@ -238,39 +253,68 @@ static inline Status FileSeek(int fd, int64_t pos) {
return Status::OK();
}

static inline Status FileRead(int fd, uint8_t* buffer, int64_t nbytes,
static inline Status FileRead(const int fd, uint8_t* buffer, const int64_t nbytes,
int64_t* bytes_read) {
#if defined(_MSC_VER)
if (nbytes > INT32_MAX) {
if (nbytes > ARROW_MAX_IO_CHUNKSIZE) {
return Status::IOError("Unable to read > 2GB blocks yet");
}
*bytes_read = static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(nbytes)));
#else
*bytes_read = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(nbytes)));
*bytes_read = 0;

while (*bytes_read != -1 && *bytes_read < nbytes) {
int64_t chunksize =
std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read);
int64_t ret = static_cast<int64_t>(
read(fd, buffer + *bytes_read, static_cast<size_t>(chunksize)));

if (ret != -1) {
*bytes_read += ret;
if (ret < chunksize) {
// EOF
break;
}
} else {
*bytes_read = ret;
}
}
#endif

if (*bytes_read == -1) {
// TODO(wesm): errno to string
return Status::IOError("Error reading bytes from file");
return Status::IOError(std::string("Error reading bytes from file: ") +
std::string(strerror(errno)));
}

return Status::OK();
}

static inline Status FileWrite(int fd, const uint8_t* buffer, int64_t nbytes) {
int ret;
static inline Status FileWrite(const int fd, const uint8_t* buffer,
const int64_t nbytes) {
int ret = 0;
#if defined(_MSC_VER)
if (nbytes > INT32_MAX) {
if (nbytes > ARROW_MAX_IO_CHUNKSIZE) {
return Status::IOError("Unable to write > 2GB blocks to file yet");
}
ret = static_cast<int>(_write(fd, buffer, static_cast<uint32_t>(nbytes)));
#else
ret = static_cast<int>(write(fd, buffer, static_cast<size_t>(nbytes)));
int64_t bytes_written = 0;

while (ret != -1 && bytes_written < nbytes) {
int64_t chunksize =
std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
ret = static_cast<int>(
write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));

if (ret != -1) {
bytes_written += ret;
}
}
#endif

if (ret == -1) {
// TODO(wesm): errno to string
return Status::IOError("Error writing bytes to file");
return Status::IOError(std::string("Error writing bytes from file: ") +
std::string(strerror(errno)));
}
return Status::OK();
}
Expand Down
14 changes: 13 additions & 1 deletion python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from pytest import skip
from pytest import skip, mark


groups = [
Expand Down Expand Up @@ -70,6 +70,18 @@ def pytest_addoption(parser):
default=False,
help=('Run only the {0} test group'.format(group)))

parser.addoption('--runslow', action='store_true',
default=False, help='run slow tests')


def pytest_collection_modifyitems(config, items):
if not config.getoption('--runslow'):
skip_slow = mark.skip(reason='need --runslow option to run')

for item in items:
if 'slow' in item.keywords:
item.add_marker(skip_slow)


def pytest_runtest_setup(item):
only_set = False
Expand Down
9 changes: 7 additions & 2 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def tearDown(self):
pass

def test_file_not_exist(self):
with self.assertRaises(pa.ArrowIOError):
with pytest.raises(pa.ArrowIOError):
FeatherReader('test_invalid_file')

def _get_null_counts(self, path, columns=None):
Expand Down Expand Up @@ -98,7 +98,7 @@ def _assert_error_on_write(self, df, exc, path=None):
def f():
write_feather(df, path)

self.assertRaises(exc, f)
pytest.raises(exc, f)

def test_num_rows_attr(self):
df = pd.DataFrame({'foo': [1, 2, 3, 4, 5]})
Expand Down Expand Up @@ -466,3 +466,8 @@ def test_unsupported(self):
# non-strings
df = pd.DataFrame({'a': ['a', 1, 2.0]})
self._assert_error_on_write(df, ValueError)

@pytest.mark.slow
def test_large_dataframe(self):
df = pd.DataFrame({'A': np.arange(400000000)})
self._check_pandas_roundtrip(df)