Skip to content

Integrated 6733 Jakerad generic math #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -316,5 +316,20 @@ public virtual PrimitiveDataFrameColumn<bool> ElementwiseLessThan<T>(T value)
throw new NotImplementedException();
}

/// <summary>
/// Performs an element-wise equal to Null on each value in the column
/// </summary>
public virtual PrimitiveDataFrameColumn<bool> ElementwiseIsNull()
{
throw new NotImplementedException();
}

/// <summary>
/// Performs an element-wise not equal to Null on each value in the column
/// </summary>
public virtual PrimitiveDataFrameColumn<bool> ElementwiseIsNotNull()
{
throw new NotImplementedException();
}
}
}
57 changes: 39 additions & 18 deletions src/Microsoft.Data.Analysis/DateTimeComputation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Reflection;
using System.Text;

namespace Microsoft.Data.Analysis
Expand Down Expand Up @@ -189,26 +191,35 @@ public void CumulativeSum(PrimitiveColumnContainer<DateTime> column, IEnumerable
throw new NotSupportedException();
}

public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
{
ret = column.Buffers[0].ReadOnlySpan[0];
var maxDate = DateTime.MinValue;
bool hasMaxValue = false;

for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var readOnlySpan = buffer.ReadOnlySpan;
var readOnlySpan = column.Buffers[b].ReadOnlySpan;
var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan;
for (int i = 0; i < readOnlySpan.Length; i++)
{
//Check if bit is not set (value is null) - skip
if (!BitmapHelper.IsValid(bitmapSpan, i))
continue;

var val = readOnlySpan[i];

if (val > ret)
if (val > maxDate)
{
ret = val;
maxDate = val;
hasMaxValue = true;
}
}
}

ret = hasMaxValue ? maxDate : null;
}

public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
{
ret = default;
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
Expand Down Expand Up @@ -237,26 +248,36 @@ public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> row
}
}

public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
{
ret = column.Buffers[0].ReadOnlySpan[0];
var minDate = DateTime.MaxValue;
bool hasMinValue = false;

for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b];
var readOnlySpan = buffer.ReadOnlySpan;
var readOnlySpan = column.Buffers[b].ReadOnlySpan;
var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan;

for (int i = 0; i < readOnlySpan.Length; i++)
{
//Check if bit is not set (value is null) - skip
if (!BitmapHelper.IsValid(bitmapSpan, i))
continue;

var val = readOnlySpan[i];

if (val < ret)
if (val < minDate)
{
ret = val;
minDate = val;
hasMinValue = true;
}
}
}

ret = hasMinValue ? minDate : null;
}

public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
{
ret = default;
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
Expand Down Expand Up @@ -285,22 +306,22 @@ public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> row
}
}

public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
{
throw new NotSupportedException();
}

public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
{
throw new NotSupportedException();
}

public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime ret)
public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime? ret)
{
throw new NotSupportedException();
}

public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret)
public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime? ret)
{
throw new NotSupportedException();
}
Expand Down
52 changes: 31 additions & 21 deletions src/Microsoft.Data.Analysis/NumberMathComputation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.Versioning;
using Microsoft.ML.Data;

namespace Microsoft.Data.Analysis
{
Expand Down Expand Up @@ -75,43 +76,43 @@ public void CumulativeSum(PrimitiveColumnContainer<T> column, IEnumerable<long>
CumulativeApply(column, Add, rows);
}

public void Max(PrimitiveColumnContainer<T> column, out T ret)
public void Max(PrimitiveColumnContainer<T> column, out T? ret)
{
ret = CalculateReduction(column, T.Max, column[0].Value);
ret = CalculateReduction(column, T.Max);
}

public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
{
ret = CalculateReduction(column, T.Max, rows);
}

public void Min(PrimitiveColumnContainer<T> column, out T ret)
public void Min(PrimitiveColumnContainer<T> column, out T? ret)
{
ret = CalculateReduction(column, T.Min, column[0].Value);
ret = CalculateReduction(column, T.Min);
}

public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
{

ret = CalculateReduction(column, T.Min, rows);
}

public void Product(PrimitiveColumnContainer<T> column, out T ret)
public void Product(PrimitiveColumnContainer<T> column, out T? ret)
{
ret = CalculateReduction(column, Multiply, T.One);
ret = CalculateReduction(column, Multiply);
}

public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
{
ret = CalculateReduction(column, Multiply, rows);
}

public void Sum(PrimitiveColumnContainer<T> column, out T ret)
public void Sum(PrimitiveColumnContainer<T> column, out T? ret)
{
ret = CalculateReduction(column, Add, T.Zero);
ret = CalculateReduction(column, Add);
}

public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T? ret)
{
ret = CalculateReduction(column, Add, rows);
}
Expand Down Expand Up @@ -144,7 +145,7 @@ protected void Apply(PrimitiveColumnContainer<T> column, Func<T, T> func)
var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan;
for (int i = 0; i < buffer.Length; i++)
{
if (column.IsValid(bitmap, i))
if (BitmapHelper.IsValid(bitmap, i))
{
buffer[i] = func(buffer[i]);
}
Expand All @@ -161,7 +162,7 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan;
for (int i = 0; i < buffer.Length; i++)
{
if (column.IsValid(bitmap, i))
if (BitmapHelper.IsValid(bitmap, i))
{
ret = func(buffer[i], ret);
buffer[i] = ret;
Expand All @@ -170,19 +171,28 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
}
}

protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func, T startValue)
protected T? CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func)
{
var ret = startValue;
T? ret = null;
bool isInitialized = false;

for (int b = 0; b < column.Buffers.Count; b++)
{
var buffer = column.Buffers[b].ReadOnlySpan;
var bitMap = column.NullBitMapBuffers[b].ReadOnlySpan;
for (int i = 0; i < buffer.Length; i++)
{
if (column.IsValid(bitMap, i))
if (BitmapHelper.IsValid(bitMap, i))
{
ret = checked(func(ret, buffer[i]));
if (!isInitialized)
{
isInitialized = true;
ret = buffer[i];
}
else
{
ret = checked(func(ret.Value, buffer[i]));
}
}
}
}
Expand Down Expand Up @@ -213,7 +223,7 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
}

row -= minRange;
if (column.IsValid(bitmap, (int)row))
if (BitmapHelper.IsValid(bitmap, (int)row))
{
if (!isInitialized)
{
Expand Down Expand Up @@ -253,7 +263,7 @@ protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T>
}
row -= minRange;

if (column.IsValid(bitMap, (int)row))
if (BitmapHelper.IsValid(bitMap, (int)row))
{
if (!isInitialized)
{
Expand Down
41 changes: 22 additions & 19 deletions src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,24 @@

namespace Microsoft.Data.Analysis
{
internal static class BitmapHelper
{
// Faster to use when we already have a span since it avoids indexing
public static bool IsValid(ReadOnlySpan<byte> bitMapBufferSpan, int index)
{
int nullBitMapSpanIndex = index / 8;
byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex];
return IsBitSet(thisBitMap, index);
}

public static bool IsBitSet(byte curBitMap, int index)
{
return ((curBitMap >> (index & 7)) & 1) != 0;
}
}

/// <summary>
/// PrimitiveDataFrameColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn
/// PrimitiveColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn
/// </summary>
/// <typeparam name="T"></typeparam>
internal partial class PrimitiveColumnContainer<T> : IEnumerable<T?>
Expand Down Expand Up @@ -223,7 +239,7 @@ public void ApplyElementwise(Func<T?, long, T?> func)
for (int i = 0; i < mutableBuffer.Length; i++)
{
long curIndex = i + prevLength;
bool isValid = IsValid(mutableNullBitMapBuffer, i);
bool isValid = BitmapHelper.IsValid(mutableNullBitMapBuffer, i);
T? value = func(isValid ? mutableBuffer[i] : null, curIndex);
mutableBuffer[i] = value.GetValueOrDefault();
SetValidityBit(mutableNullBitMapBuffer, i, value != null);
Expand All @@ -246,22 +262,14 @@ public void Apply<TResult>(Func<T?, TResult?> func, PrimitiveColumnContainer<TRe

for (int i = 0; i < sourceBuffer.Length; i++)
{
bool isValid = IsValid(sourceNullBitMap, i);
bool isValid = BitmapHelper.IsValid(sourceNullBitMap, i);
TResult? value = func(isValid ? sourceBuffer[i] : null);
mutableResultBuffer[i] = value.GetValueOrDefault();
resultContainer.SetValidityBit(mutableResultNullBitMapBuffers, i, value != null);
}
}
}

// Faster to use when we already have a span since it avoids indexing
public bool IsValid(ReadOnlySpan<byte> bitMapBufferSpan, int index)
{
int nullBitMapSpanIndex = index / 8;
byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex];
return IsBitSet(thisBitMap, index);
}

public bool IsValid(long index) => NullCount == 0 || GetValidityBit(index);

private byte SetBit(byte curBitMap, int index, bool value)
Expand Down Expand Up @@ -329,11 +337,6 @@ internal void SetValidityBit(long index, bool value)
SetValidityBit(bitMapBuffer.Span, (int)index, value);
}

private bool IsBitSet(byte curBitMap, int index)
{
return ((curBitMap >> (index & 7)) & 1) != 0;
}

private bool GetValidityBit(long index)
{
if ((uint)index >= Length)
Expand All @@ -350,7 +353,7 @@ private bool GetValidityBit(long index)
int bitMapBufferIndex = (int)((uint)index / 8);
Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex);
byte curBitMap = bitMapBuffer[bitMapBufferIndex];
return IsBitSet(curBitMap, (int)index);
return BitmapHelper.IsBitSet(curBitMap, (int)index);
}

public long Length;
Expand Down Expand Up @@ -512,7 +515,7 @@ public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndic
spanIndex = buffer.Length - 1 - i;

long mapRowIndex = mapIndicesIntSpan.IsEmpty ? mapIndicesLongSpan[spanIndex] : mapIndicesIntSpan[spanIndex];
bool mapRowIndexIsValid = mapIndices.IsValid(mapIndicesNullBitMapSpan, spanIndex);
bool mapRowIndexIsValid = BitmapHelper.IsValid(mapIndicesNullBitMapSpan, spanIndex);
if (mapRowIndexIsValid && (mapRowIndex < minRange || mapRowIndex >= maxRange))
{
int bufferIndex = (int)(mapRowIndex / maxCapacity);
Expand All @@ -527,7 +530,7 @@ public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndic
{
mapRowIndex -= minRange;
value = thisSpan[(int)mapRowIndex];
isValid = IsValid(thisNullBitMapSpan, (int)mapRowIndex);
isValid = BitmapHelper.IsValid(thisNullBitMapSpan, (int)mapRowIndex);
}

retSpan[i] = isValid ? value : default;
Expand Down
Loading