Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions cpp/arrow/ArrowReaderProperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,46 @@ extern "C"
{
TRYCATCH(properties->set_arrow_extensions_enabled(extensions_enabled);)
}

PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(const ArrowReaderProperties* properties, int64_t* value)
{
TRYCATCH(
const auto& opts = properties->cache_options();
*value = opts.hole_size_limit;
)
}

PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(const ArrowReaderProperties* properties, int64_t* value)
{
TRYCATCH(
const auto& opts = properties->cache_options();
*value = opts.range_size_limit;
)
}

PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_Lazy(const ArrowReaderProperties* properties, bool* value)
{
TRYCATCH(
const auto& opts = properties->cache_options();
*value = opts.lazy;
)
}

PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_PrefetchLimit(const ArrowReaderProperties* properties, int64_t* value)
{
TRYCATCH(
const auto& opts = properties->cache_options();
*value = opts.prefetch_limit;
)
}

PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_SetCacheOptions(ArrowReaderProperties* properties, int64_t hole_size_limit, int64_t range_size_limit, bool lazy, int64_t prefetch_limit)
{
::arrow::io::CacheOptions cache_options;
cache_options.hole_size_limit = hole_size_limit;
cache_options.range_size_limit = range_size_limit;
cache_options.lazy = lazy;
cache_options.prefetch_limit = prefetch_limit;
TRYCATCH(properties->set_cache_options(cache_options);)
}
}
9 changes: 9 additions & 0 deletions csharp.test/Arrow/TestArrowReaderProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ public void TestDefaultProperties()
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.Binary));
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.List));
Assert.That(properties.ArrowExtensionEnabled, Is.False);
Assert.That(properties.CacheOptions.hole_size_limit, Is.EqualTo(8192));
Assert.That(properties.CacheOptions.range_size_limit, Is.EqualTo(32 * 1024 * 1024));
Assert.That(properties.CacheOptions.lazy, Is.True);
Assert.That(properties.CacheOptions.prefetch_limit, Is.EqualTo(0));
}

[Test]
Expand All @@ -34,6 +38,7 @@ public void TestSetProperties()
properties.BinaryType = Apache.Arrow.Types.ArrowTypeId.LargeBinary;
properties.ListType = Apache.Arrow.Types.ArrowTypeId.LargeList;
properties.ArrowExtensionEnabled = true;
properties.CacheOptions = new CacheOptions(hole_size_limit: 1024, range_size_limit: 2048, lazy: false, prefetch_limit: 4096);

Assert.That(properties.UseThreads, Is.True);
Assert.That(properties.BatchSize, Is.EqualTo(789));
Expand All @@ -43,6 +48,10 @@ public void TestSetProperties()
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeBinary));
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeList));
Assert.That(properties.ArrowExtensionEnabled, Is.True);
Assert.That(properties.CacheOptions.hole_size_limit, Is.EqualTo(1024));
Assert.That(properties.CacheOptions.range_size_limit, Is.EqualTo(2048));
Assert.That(properties.CacheOptions.lazy, Is.False);
Assert.That(properties.CacheOptions.prefetch_limit, Is.EqualTo(4096));
}
}
}
46 changes: 46 additions & 0 deletions csharp/Arrow/ArrowReaderProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,37 @@ public bool ArrowExtensionEnabled
}
}

/// <summary>
/// The options for read coalescing.
/// This can be used to tune the
/// implementation for characteristics of different filesystems.
/// </summary>
public CacheOptions CacheOptions
{
get
{
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(Handle.IntPtr, out long holeSizeLimit));
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(Handle.IntPtr, out long rangeSizeLimit));
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_Lazy(Handle.IntPtr, out bool lazy));
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_PrefetchLimit(Handle.IntPtr, out long prefetchLimit));
GC.KeepAlive(Handle);

return new CacheOptions(holeSizeLimit, rangeSizeLimit, lazy, prefetchLimit);
}

set
{
ExceptionInfo.Check(ArrowReaderProperties_SetCacheOptions(
Handle.IntPtr,
value.hole_size_limit,
value.range_size_limit,
value.lazy,
value.prefetch_limit));

GC.KeepAlive(Handle);
}
}

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_GetDefault(out IntPtr readerProperties);

Expand Down Expand Up @@ -225,6 +256,21 @@ public bool ArrowExtensionEnabled
[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_SetArrowExtensionEnabled(IntPtr readerProperties, bool extensionsEnabled);

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(IntPtr readerProperties, out long holeSizeLimit);

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(IntPtr readerProperties, out long rangeSizeLimit);

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_Lazy(IntPtr readerProperties, out bool lazy);

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_PrefetchLimit(IntPtr readerProperties, out long prefetchLimit);

[DllImport(ParquetDll.Name)]
private static extern IntPtr ArrowReaderProperties_SetCacheOptions(IntPtr readerProperties, long holeSizeLimit, long rangeSizeLimit, bool lazy, long prefetchLimit);

internal readonly ParquetHandle Handle;
}
}
45 changes: 45 additions & 0 deletions csharp/CacheOption.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using System;
using System.Runtime.InteropServices;

namespace ParquetSharp
{
public struct CacheOptions
{
public CacheOptions(long hole_size_limit, long range_size_limit, bool lazy, long prefetch_limit = 0)
{
this.hole_size_limit = hole_size_limit;
this.range_size_limit = range_size_limit;
this.lazy = lazy;
this.prefetch_limit = prefetch_limit;
}

/// <summary>
/// The maximum distance in bytes between two consecutive
/// ranges; beyond this value, ranges are not combined
/// </summary>
public long hole_size_limit;

/// <summary>
/// The maximum size in bytes of a combined range; if
/// combining two consecutive ranges would produce a range of a
/// size greater than this, they are not combined
/// </summary>
public long range_size_limit;

/// <summary>
/// A lazy cache does not perform any I/O until requested.
/// lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
/// lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
/// needs them.
/// lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
/// range that is currently being read.
/// </summary>
public bool lazy;

/// <summary>
/// The maximum number of ranges to be prefetched. This is only used
/// for lazy cache to asynchronously read some ranges after reading the target range.
/// </summary>
public long prefetch_limit;
}
}
9 changes: 9 additions & 0 deletions csharp/PublicAPI.Unshipped.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.get -> Apache.Arrow.Types.Ar
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.set -> void
ParquetSharp.Arrow.ArrowReaderProperties.ListType.get -> Apache.Arrow.Types.ArrowTypeId
ParquetSharp.Arrow.ArrowReaderProperties.ListType.set -> void
ParquetSharp.Arrow.ArrowReaderProperties.CacheOptions.get -> ParquetSharp.CacheOptions
ParquetSharp.Arrow.ArrowReaderProperties.CacheOptions.set -> void
ParquetSharp.CacheOptions
ParquetSharp.CacheOptions.CacheOptions() -> void
ParquetSharp.CacheOptions.CacheOptions(long hole_size_limit, long range_size_limit, bool lazy, long prefetch_limit = 0) -> void
ParquetSharp.CacheOptions.hole_size_limit -> long
ParquetSharp.CacheOptions.range_size_limit -> long
ParquetSharp.CacheOptions.lazy -> bool
ParquetSharp.CacheOptions.prefetch_limit -> long
ParquetSharp.ReaderProperties.ThriftStringSizeLimit.get -> int
ParquetSharp.ReaderProperties.SetThriftStringSizeLimit(int size) -> void
ParquetSharp.ReaderProperties.ThriftContainerSizeLimit.get -> int
Expand Down
Loading