Skip to content

Commit

Permalink
Reorganize dataframe files (#6872)
Browse files Browse the repository at this point in the history
* Increase performance of elementwise comparison operations

* Fix Perf Test

* Reorganize files in the DataFrame related projects

* Fix merge issues
  • Loading branch information
asmirnov82 authored Dec 12, 2023
1 parent eb9af18 commit efab011
Show file tree
Hide file tree
Showing 31 changed files with 2,683 additions and 2,535 deletions.
File renamed without changes.
File renamed without changes.
106 changes: 106 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/ArrowStringColumnTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Apache.Arrow;
using Xunit;

namespace Microsoft.Data.Analysis.Tests
{
public class ArrowStringColumnTests
{

[Fact]
public void TestBasicArrowStringColumn()
{
StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build();
Memory<byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 };
Memory<byte> nullMemory = new byte[] { 0, 0, 0, 0 };
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 };

ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount);
Assert.Equal(2, stringColumn.Length);
Assert.Equal("foo", stringColumn[0]);
Assert.Equal("bar", stringColumn[1]);
}

[Fact]
public void TestArrowStringColumnWithNulls()
{
string data = "joemark";
byte[] bytes = Encoding.UTF8.GetBytes(data);
Memory<byte> dataMemory = new Memory<byte>(bytes);
Memory<byte> nullMemory = new byte[] { 0b1101 };
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 };
ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, 4, 1);

Assert.Equal(4, stringColumn.Length);
Assert.Equal("joe", stringColumn[0]);
Assert.Null(stringColumn[1]);
Assert.Equal("mark", stringColumn[2]);
Assert.Equal("", stringColumn[3]);

List<string> ret = stringColumn[0, 4];
Assert.Equal("joe", ret[0]);
Assert.Null(ret[1]);
Assert.Equal("mark", ret[2]);
Assert.Equal("", ret[3]);
}

[Fact]
public void TestArrowStringColumnClone()
{
StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build();
Memory<byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 };
Memory<byte> nullMemory = new byte[] { 0, 0, 0, 0 };
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 };

ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount);

DataFrameColumn clone = stringColumn.Clone(numberOfNullsToAppend: 5);
Assert.Equal(7, clone.Length);
Assert.Equal(stringColumn[0], clone[0]);
Assert.Equal(stringColumn[1], clone[1]);
for (int i = 2; i < 7; i++)
Assert.Null(clone[i]);
}

[Fact]
public void TestArrowStringApply()
{
ArrowStringDataFrameColumn column = DataFrameTests.CreateArrowStringColumn(10);
ArrowStringDataFrameColumn ret = column.Apply((string cur) =>
{
if (cur != null)
{
return cur + "123";
}
return null;
});
for (long i = 0; i < column.Length; i++)
{
if (column[i] != null)
{
Assert.Equal(column[i] + "123", ret[i]);
}
else
{
Assert.Null(ret[i]);
}
}
Assert.Equal(1, ret.NullCount);

// Test null counts
ret = column.Apply((string cur) =>
{
return null;
});
Assert.Equal(column.Length, ret.NullCount);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

namespace Microsoft.Data.Analysis.Tests
{
public class DataFrameJoinTests
public class DataFrameJoinExtensionsTests
{
[Fact]
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_EmptyResult()
public void GetSortedListsIntersection_EmptyCollections_EmptyResult()
{
// Arrange

Expand All @@ -28,7 +28,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_Empty
}

[Fact]
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_FirstIsNotEmpty_EmptyResult()
public void GetSortedListsIntersection_EmptyCollections_FirstIsNotEmpty_EmptyResult()
{
// Arrange

Expand All @@ -51,7 +51,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_First
}

[Fact]
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_SecondIsNotEmpty_EmptyResult()
public void GetSortedListsIntersection_EmptyCollections_SecondIsNotEmpty_EmptyResult()
{
// Arrange

Expand All @@ -74,7 +74,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_Secon
}

[Fact]
public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_WithoutIntersection_Success()
public void GetSortedListsIntersection_SortedCollections_WithoutIntersection_Success()
{
// Arrange

Expand Down Expand Up @@ -105,7 +105,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_With
}

[Fact]
public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_WithIntersection_Success()
public void GetSortedListsIntersection_SortedCollections_WithIntersection_Success()
{
// Arrange

Expand Down
Loading

0 comments on commit efab011

Please sign in to comment.