Skip to content

Commit 2503375

Browse files
authored
Remove unnecessary allocation in ArrowStreamWriter (#73)
## What's Changed ArrowStreamWriter allocates 8k of memory by creating a new array pool. This change introduces a shared buffer instead of pool to reduce the allocations. The array pool is used to rent small arrays (8 bytes) but the pool allocates much bigger arrays (8kb in total) The Array pool has access time overhead for small arrays compared to direct allocation. Results from benchmarks: Old implementation: | Method | BatchLength | ColumnSetCount | Mean | Error | StdDev | Allocated | |----------- |------------ |--------------- |-----------:|----------:|----------:|----------:| | WriteBatch | 10000 | 10 | 6.118 ms | 0.1215 ms | 0.3345 ms | 248.53 KB | | WriteBatch | 10000 | 14 | 9.788 ms | 0.1910 ms | 0.3396 ms | 324.12 KB | | WriteBatch | 300000 | 10 | 119.351 ms | 3.1713 ms | 9.3008 ms | 248.53 KB | | WriteBatch | 300000 | 14 | 136.697 ms | 2.9229 ms | 8.4799 ms | 324.12 KB | New implementation: | Method | BatchLength | ColumnSetCount | Mean | Error | StdDev | Median | Allocated | |----------- |------------ |--------------- |-----------:|----------:|-----------:|-----------:|----------:| | WriteBatch | 10000 | 10 | 5.925 ms | 0.2057 ms | 0.6001 ms | 5.843 ms | 240.64 KB | | WriteBatch | 10000 | 14 | 8.908 ms | 0.2743 ms | 0.8002 ms | 8.778 ms | 316.23 KB | | WriteBatch | 300000 | 10 | 94.835 ms | 1.7872 ms | 3.7699 ms | 93.892 ms | 240.64 KB | | WriteBatch | 300000 | 14 | 147.995 ms | 3.6873 ms | 10.6975 ms | 144.591 ms | 316.23 KB | Closes #41.
1 parent fbbe78e commit 2503375

File tree

2 files changed

+5
-7
lines changed

2 files changed

+5
-7
lines changed

src/Apache.Arrow/Ipc/ArrowFileWriter.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
// limitations under the License.
1515

1616
using System;
17+
using System.Buffers;
1718
using System.Buffers.Binary;
1819
using System.Collections.Generic;
1920
using System.Diagnostics;
@@ -221,7 +222,7 @@ private void WriteFooter(Schema schema)
221222

222223
// Write footer length
223224

224-
using (Buffers.RentReturn(4, out Memory<byte> buffer))
225+
using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> buffer))
225226
{
226227
int footerLength;
227228
checked
@@ -292,7 +293,7 @@ private async Task WriteFooterAsync(Schema schema, CancellationToken cancellatio
292293

293294
cancellationToken.ThrowIfCancellationRequested();
294295

295-
using (Buffers.RentReturn(4, out Memory<byte> buffer))
296+
using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> buffer))
296297
{
297298
int footerLength;
298299
checked

src/Apache.Arrow/Ipc/ArrowStreamWriter.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -611,8 +611,6 @@ public void Visit(IArrowArray array)
611611

612612
protected Stream BaseStream { get; }
613613

614-
protected ArrayPool<byte> Buffers { get; }
615-
616614
private protected FlatBufferBuilder Builder { get; }
617615

618616
protected bool HasWrittenSchema { get; set; }
@@ -663,7 +661,6 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp
663661
_leaveOpen = leaveOpen;
664662
_allocator = allocator ?? MemoryAllocator.Default.Value;
665663

666-
Buffers = ArrayPool<byte>.Create();
667664
Builder = new FlatBufferBuilder(1024);
668665
HasWrittenSchema = false;
669666

@@ -1277,7 +1274,7 @@ private protected async ValueTask WriteFlatBufferAsync(CancellationToken cancell
12771274

12781275
private void WriteIpcMessageLength(int length)
12791276
{
1280-
using (Buffers.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
1277+
using (ArrayPool<byte>.Shared.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
12811278
{
12821279
Memory<byte> currentBufferPosition = buffer;
12831280
if (!_options.WriteLegacyIpcFormat)
@@ -1294,7 +1291,7 @@ private void WriteIpcMessageLength(int length)
12941291

12951292
private async ValueTask WriteIpcMessageLengthAsync(int length, CancellationToken cancellationToken)
12961293
{
1297-
using (Buffers.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
1294+
using (ArrayPool<byte>.Shared.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
12981295
{
12991296
Memory<byte> currentBufferPosition = buffer;
13001297
if (!_options.WriteLegacyIpcFormat)

0 commit comments

Comments
 (0)