Skip to content

Commit 274e384

Browse files
eerhardtmstfbl
authored andcommitted
Fix perf regression in ShuffleRows (#5417)
RowShufflingTransformer is using ChannelReader incorrectly. It needs to block waiting for items to read and was Thread.Sleeping in order to wait, but not spin the current core. This caused a major perf regression. The fix is to block synchronously correctly - by calling AsTask() on the ValueTask that is returned from the ChannelReader and block on the Task. Fix #5416
1 parent 05acb93 commit 274e384

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using BenchmarkDotNet.Attributes;
6+
using Microsoft.ML.Benchmarks.Harness;
7+
using Microsoft.ML.Data;
8+
9+
namespace Microsoft.ML.Benchmarks
10+
{
11+
[CIBenchmark]
12+
public class ShuffleRowsBench : BenchmarkBase
13+
{
14+
private TrainRow[] _rows;
15+
private MLContext _context;
16+
17+
[GlobalSetup]
18+
public void Setup()
19+
{
20+
_rows = new TrainRow[10_000];
21+
for (var i = 0; i < _rows.Length; i++)
22+
{
23+
_rows[i] = new TrainRow() { Sample = i.ToString(), Week = i, Label = i / 2 };
24+
}
25+
26+
_context = new MLContext();
27+
}
28+
29+
[Benchmark]
30+
public void ShuffleRows()
31+
{
32+
IDataView data = _context.Data.LoadFromEnumerable(_rows);
33+
34+
IDataView shuffledData = _context.Data.ShuffleRows(data, seed: 0);
35+
36+
foreach (string sample in shuffledData.GetColumn<string>("Sample"))
37+
{
38+
}
39+
}
40+
41+
private class TrainRow
42+
{
43+
[ColumnName("Sample")]
44+
public string Sample;
45+
46+
[ColumnName("Week")]
47+
public float Week;
48+
49+
[ColumnName("Label")]
50+
public float Label;
51+
}
52+
}
53+
}

0 commit comments

Comments
 (0)