Skip to content

Commit 1d57c45

Browse files
authored
Merge pull request #3 from asmirnov82/jakerad_generic_math
Integrated 6733 Jakerad generic math
2 parents 373fc04 + 2856d3a commit 1d57c45

17 files changed

+405
-96
lines changed

src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,5 +316,20 @@ public virtual PrimitiveDataFrameColumn<bool> ElementwiseLessThan<T>(T value)
316316
throw new NotImplementedException();
317317
}
318318

319+
/// <summary>
320+
/// Performs an element-wise equal to Null on each value in the column
321+
/// </summary>
322+
public virtual PrimitiveDataFrameColumn<bool> ElementwiseIsNull()
323+
{
324+
throw new NotImplementedException();
325+
}
326+
327+
/// <summary>
328+
/// Performs an element-wise not equal to Null on each value in the column
329+
/// </summary>
330+
public virtual PrimitiveDataFrameColumn<bool> ElementwiseIsNotNull()
331+
{
332+
throw new NotImplementedException();
333+
}
319334
}
320335
}

src/Microsoft.Data.Analysis/DateTimeComputation.cs

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Diagnostics;
8+
using System.Reflection;
79
using System.Text;
810

911
namespace Microsoft.Data.Analysis
@@ -189,26 +191,35 @@ public void CumulativeSum(PrimitiveColumnContainer<DateTime> column, IEnumerable
189191
throw new NotSupportedException();
190192
}
191193

192-
public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
194+
public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
193195
{
194-
ret = column.Buffers[0].ReadOnlySpan[0];
196+
var maxDate = DateTime.MinValue;
197+
bool hasMaxValue = false;
198+
195199
for (int b = 0; b < column.Buffers.Count; b++)
196200
{
197-
var buffer = column.Buffers[b];
198-
var readOnlySpan = buffer.ReadOnlySpan;
201+
var readOnlySpan = column.Buffers[b].ReadOnlySpan;
202+
var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan;
199203
for (int i = 0; i < readOnlySpan.Length; i++)
200204
{
205+
//Check if bit is not set (value is null) - skip
206+
if (!BitmapHelper.IsValid(bitmapSpan, i))
207+
continue;
208+
201209
var val = readOnlySpan[i];
202210

203-
if (val > ret)
211+
if (val > maxDate)
204212
{
205-
ret = val;
213+
maxDate = val;
214+
hasMaxValue = true;
206215
}
207216
}
208217
}
218+
219+
ret = hasMaxValue ? maxDate : null;
209220
}
210221

211-
public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
222+
public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
212223
{
213224
ret = default;
214225
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
@@ -237,26 +248,36 @@ public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> row
237248
}
238249
}
239250

240-
public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
251+
public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
241252
{
242-
ret = column.Buffers[0].ReadOnlySpan[0];
253+
var minDate = DateTime.MaxValue;
254+
bool hasMinValue = false;
255+
243256
for (int b = 0; b < column.Buffers.Count; b++)
244257
{
245-
var buffer = column.Buffers[b];
246-
var readOnlySpan = buffer.ReadOnlySpan;
258+
var readOnlySpan = column.Buffers[b].ReadOnlySpan;
259+
var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan;
260+
247261
for (int i = 0; i < readOnlySpan.Length; i++)
248262
{
263+
//Check if bit is not set (value is null) - skip
264+
if (!BitmapHelper.IsValid(bitmapSpan, i))
265+
continue;
266+
249267
var val = readOnlySpan[i];
250268

251-
if (val < ret)
269+
if (val < minDate)
252270
{
253-
ret = val;
271+
minDate = val;
272+
hasMinValue = true;
254273
}
255274
}
256275
}
276+
277+
ret = hasMinValue ? minDate : null;
257278
}
258279

259-
public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
280+
public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
260281
{
261282
ret = default;
262283
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
@@ -285,22 +306,22 @@ public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> row
285306
}
286307
}
287308

288-
public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
309+
public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
289310
{
290311
throw new NotSupportedException();
291312
}
292313

293-
public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
314+
public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
294315
{
295316
throw new NotSupportedException();
296317
}
297318

298-
public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
319+
public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
299320
{
300321
throw new NotSupportedException();
301322
}
302323

303-
public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
324+
public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
304325
{
305326
throw new NotSupportedException();
306327
}

src/Microsoft.Data.Analysis/NumberMathComputation.cs

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88

99
using System;
1010
using System.Collections.Generic;
11+
using System.Linq;
12+
using System.Runtime.CompilerServices;
1113
using System.Runtime.Versioning;
12-
using Microsoft.ML.Data;
1314

1415
namespace Microsoft.Data.Analysis
1516
{
@@ -75,43 +76,43 @@ public void CumulativeSum(PrimitiveColumnContainer<T> column, IEnumerable<long>
7576
CumulativeApply(column, Add, rows);
7677
}
7778

78-
public void Max(PrimitiveColumnContainer<T> column, out T ret)
79+
public void Max(PrimitiveColumnContainer<T> column, out T? ret)
7980
{
80-
ret = CalculateReduction(column, T.Max, column[0].Value);
81+
ret = CalculateReduction(column, T.Max);
8182
}
8283

83-
public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
84+
public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
8485
{
8586
ret = CalculateReduction(column, T.Max, rows);
8687
}
8788

88-
public void Min(PrimitiveColumnContainer<T> column, out T ret)
89+
public void Min(PrimitiveColumnContainer<T> column, out T? ret)
8990
{
90-
ret = CalculateReduction(column, T.Min, column[0].Value);
91+
ret = CalculateReduction(column, T.Min);
9192
}
9293

93-
public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
94+
public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
9495
{
9596

9697
ret = CalculateReduction(column, T.Min, rows);
9798
}
9899

99-
public void Product(PrimitiveColumnContainer<T> column, out T ret)
100+
public void Product(PrimitiveColumnContainer<T> column, out T? ret)
100101
{
101-
ret = CalculateReduction(column, Multiply, T.One);
102+
ret = CalculateReduction(column, Multiply);
102103
}
103104

104-
public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
105+
public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
105106
{
106107
ret = CalculateReduction(column, Multiply, rows);
107108
}
108109

109-
public void Sum(PrimitiveColumnContainer<T> column, out T ret)
110+
public void Sum(PrimitiveColumnContainer<T> column, out T? ret)
110111
{
111-
ret = CalculateReduction(column, Add, T.Zero);
112+
ret = CalculateReduction(column, Add);
112113
}
113114

114-
public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
115+
public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
115116
{
116117
ret = CalculateReduction(column, Add, rows);
117118
}
@@ -144,7 +145,7 @@ protected void Apply(PrimitiveColumnContainer<T> column, Func<T, T> func)
144145
var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan;
145146
for (int i = 0; i < buffer.Length; i++)
146147
{
147-
if (column.IsValid(bitmap, i))
148+
if (BitmapHelper.IsValid(bitmap, i))
148149
{
149150
buffer[i] = func(buffer[i]);
150151
}
@@ -161,7 +162,7 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
161162
var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan;
162163
for (int i = 0; i < buffer.Length; i++)
163164
{
164-
if (column.IsValid(bitmap, i))
165+
if (BitmapHelper.IsValid(bitmap, i))
165166
{
166167
ret = func(buffer[i], ret);
167168
buffer[i] = ret;
@@ -170,19 +171,28 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
170171
}
171172
}
172173

173-
protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func, T startValue)
174+
protected T? CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func)
174175
{
175-
var ret = startValue;
176+
T? ret = null;
177+
bool isInitialized = false;
176178

177179
for (int b = 0; b < column.Buffers.Count; b++)
178180
{
179181
var buffer = column.Buffers[b].ReadOnlySpan;
180182
var bitMap = column.NullBitMapBuffers[b].ReadOnlySpan;
181183
for (int i = 0; i < buffer.Length; i++)
182184
{
183-
if (column.IsValid(bitMap, i))
185+
if (BitmapHelper.IsValid(bitMap, i))
184186
{
185-
ret = checked(func(ret, buffer[i]));
187+
if (!isInitialized)
188+
{
189+
isInitialized = true;
190+
ret = buffer[i];
191+
}
192+
else
193+
{
194+
ret = checked(func(ret.Value, buffer[i]));
195+
}
186196
}
187197
}
188198
}
@@ -213,7 +223,7 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
213223
}
214224

215225
row -= minRange;
216-
if (column.IsValid(bitmap, (int)row))
226+
if (BitmapHelper.IsValid(bitmap, (int)row))
217227
{
218228
if (!isInitialized)
219229
{
@@ -253,7 +263,7 @@ protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T>
253263
}
254264
row -= minRange;
255265

256-
if (column.IsValid(bitMap, (int)row))
266+
if (BitmapHelper.IsValid(bitMap, (int)row))
257267
{
258268
if (!isInitialized)
259269
{

src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,24 @@
1212

1313
namespace Microsoft.Data.Analysis
1414
{
15+
internal static class BitmapHelper
16+
{
17+
// Faster to use when we already have a span since it avoids indexing
18+
public static bool IsValid(ReadOnlySpan<byte> bitMapBufferSpan, int index)
19+
{
20+
int nullBitMapSpanIndex = index / 8;
21+
byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex];
22+
return IsBitSet(thisBitMap, index);
23+
}
24+
25+
public static bool IsBitSet(byte curBitMap, int index)
26+
{
27+
return ((curBitMap >> (index & 7)) & 1) != 0;
28+
}
29+
}
30+
1531
/// <summary>
16-
/// PrimitiveDataFrameColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn
32+
/// PrimitiveColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn
1733
/// </summary>
1834
/// <typeparam name="T"></typeparam>
1935
internal partial class PrimitiveColumnContainer<T> : IEnumerable<T?>
@@ -223,7 +239,7 @@ public void ApplyElementwise(Func<T?, long, T?> func)
223239
for (int i = 0; i < mutableBuffer.Length; i++)
224240
{
225241
long curIndex = i + prevLength;
226-
bool isValid = IsValid(mutableNullBitMapBuffer, i);
242+
bool isValid = BitmapHelper.IsValid(mutableNullBitMapBuffer, i);
227243
T? value = func(isValid ? mutableBuffer[i] : null, curIndex);
228244
mutableBuffer[i] = value.GetValueOrDefault();
229245
SetValidityBit(mutableNullBitMapBuffer, i, value != null);
@@ -246,22 +262,14 @@ public void Apply<TResult>(Func<T?, TResult?> func, PrimitiveColumnContainer<TRe
246262

247263
for (int i = 0; i < sourceBuffer.Length; i++)
248264
{
249-
bool isValid = IsValid(sourceNullBitMap, i);
265+
bool isValid = BitmapHelper.IsValid(sourceNullBitMap, i);
250266
TResult? value = func(isValid ? sourceBuffer[i] : null);
251267
mutableResultBuffer[i] = value.GetValueOrDefault();
252268
resultContainer.SetValidityBit(mutableResultNullBitMapBuffers, i, value != null);
253269
}
254270
}
255271
}
256272

257-
// Faster to use when we already have a span since it avoids indexing
258-
public bool IsValid(ReadOnlySpan<byte> bitMapBufferSpan, int index)
259-
{
260-
int nullBitMapSpanIndex = index / 8;
261-
byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex];
262-
return IsBitSet(thisBitMap, index);
263-
}
264-
265273
public bool IsValid(long index) => NullCount == 0 || GetValidityBit(index);
266274

267275
private byte SetBit(byte curBitMap, int index, bool value)
@@ -329,11 +337,6 @@ internal void SetValidityBit(long index, bool value)
329337
SetValidityBit(bitMapBuffer.Span, (int)index, value);
330338
}
331339

332-
private bool IsBitSet(byte curBitMap, int index)
333-
{
334-
return ((curBitMap >> (index & 7)) & 1) != 0;
335-
}
336-
337340
private bool GetValidityBit(long index)
338341
{
339342
if ((uint)index >= Length)
@@ -350,7 +353,7 @@ private bool GetValidityBit(long index)
350353
int bitMapBufferIndex = (int)((uint)index / 8);
351354
Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex);
352355
byte curBitMap = bitMapBuffer[bitMapBufferIndex];
353-
return IsBitSet(curBitMap, (int)index);
356+
return BitmapHelper.IsBitSet(curBitMap, (int)index);
354357
}
355358

356359
public long Length;
@@ -512,7 +515,7 @@ public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndic
512515
spanIndex = buffer.Length - 1 - i;
513516

514517
long mapRowIndex = mapIndicesIntSpan.IsEmpty ? mapIndicesLongSpan[spanIndex] : mapIndicesIntSpan[spanIndex];
515-
bool mapRowIndexIsValid = mapIndices.IsValid(mapIndicesNullBitMapSpan, spanIndex);
518+
bool mapRowIndexIsValid = BitmapHelper.IsValid(mapIndicesNullBitMapSpan, spanIndex);
516519
if (mapRowIndexIsValid && (mapRowIndex < minRange || mapRowIndex >= maxRange))
517520
{
518521
int bufferIndex = (int)(mapRowIndex / maxCapacity);
@@ -527,7 +530,7 @@ public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndic
527530
{
528531
mapRowIndex -= minRange;
529532
value = thisSpan[(int)mapRowIndex];
530-
isValid = IsValid(thisNullBitMapSpan, (int)mapRowIndex);
533+
isValid = BitmapHelper.IsValid(thisNullBitMapSpan, (int)mapRowIndex);
531534
}
532535

533536
retSpan[i] = isValid ? value : default;

0 commit comments

Comments
 (0)