Skip to content

Commit efab011

Browse files
authored
Reorganize dataframe files (#6872)
* Increase performance of elementwise comparison operations * Fix Perf Test * Reorganize files in the DataFrame related projects * Fix merge issues
1 parent eb9af18 commit efab011

31 files changed

+2683
-2535
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Linq;
8+
using System.Text;
9+
using System.Threading.Tasks;
10+
using Apache.Arrow;
11+
using Xunit;
12+
13+
namespace Microsoft.Data.Analysis.Tests
14+
{
15+
public class ArrowStringColumnTests
16+
{
17+
18+
[Fact]
19+
public void TestBasicArrowStringColumn()
20+
{
21+
StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build();
22+
Memory<byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 };
23+
Memory<byte> nullMemory = new byte[] { 0, 0, 0, 0 };
24+
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 };
25+
26+
ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount);
27+
Assert.Equal(2, stringColumn.Length);
28+
Assert.Equal("foo", stringColumn[0]);
29+
Assert.Equal("bar", stringColumn[1]);
30+
}
31+
32+
[Fact]
33+
public void TestArrowStringColumnWithNulls()
34+
{
35+
string data = "joemark";
36+
byte[] bytes = Encoding.UTF8.GetBytes(data);
37+
Memory<byte> dataMemory = new Memory<byte>(bytes);
38+
Memory<byte> nullMemory = new byte[] { 0b1101 };
39+
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 };
40+
ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, 4, 1);
41+
42+
Assert.Equal(4, stringColumn.Length);
43+
Assert.Equal("joe", stringColumn[0]);
44+
Assert.Null(stringColumn[1]);
45+
Assert.Equal("mark", stringColumn[2]);
46+
Assert.Equal("", stringColumn[3]);
47+
48+
List<string> ret = stringColumn[0, 4];
49+
Assert.Equal("joe", ret[0]);
50+
Assert.Null(ret[1]);
51+
Assert.Equal("mark", ret[2]);
52+
Assert.Equal("", ret[3]);
53+
}
54+
55+
[Fact]
56+
public void TestArrowStringColumnClone()
57+
{
58+
StringArray strArray = new StringArray.Builder().Append("foo").Append("bar").Build();
59+
Memory<byte> dataMemory = new byte[] { 102, 111, 111, 98, 97, 114 };
60+
Memory<byte> nullMemory = new byte[] { 0, 0, 0, 0 };
61+
Memory<byte> offsetMemory = new byte[] { 0, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0 };
62+
63+
ArrowStringDataFrameColumn stringColumn = new ArrowStringDataFrameColumn("String", dataMemory, offsetMemory, nullMemory, strArray.Length, strArray.NullCount);
64+
65+
DataFrameColumn clone = stringColumn.Clone(numberOfNullsToAppend: 5);
66+
Assert.Equal(7, clone.Length);
67+
Assert.Equal(stringColumn[0], clone[0]);
68+
Assert.Equal(stringColumn[1], clone[1]);
69+
for (int i = 2; i < 7; i++)
70+
Assert.Null(clone[i]);
71+
}
72+
73+
[Fact]
74+
public void TestArrowStringApply()
75+
{
76+
ArrowStringDataFrameColumn column = DataFrameTests.CreateArrowStringColumn(10);
77+
ArrowStringDataFrameColumn ret = column.Apply((string cur) =>
78+
{
79+
if (cur != null)
80+
{
81+
return cur + "123";
82+
}
83+
return null;
84+
});
85+
for (long i = 0; i < column.Length; i++)
86+
{
87+
if (column[i] != null)
88+
{
89+
Assert.Equal(column[i] + "123", ret[i]);
90+
}
91+
else
92+
{
93+
Assert.Null(ret[i]);
94+
}
95+
}
96+
Assert.Equal(1, ret.NullCount);
97+
98+
// Test null counts
99+
ret = column.Apply((string cur) =>
100+
{
101+
return null;
102+
});
103+
Assert.Equal(column.Length, ret.NullCount);
104+
}
105+
}
106+
}

test/Microsoft.Data.Analysis.Tests/DataFrameJoinTests.cs renamed to test/Microsoft.Data.Analysis.Tests/DataFrameJoinExtensionsTests.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88

99
namespace Microsoft.Data.Analysis.Tests
1010
{
11-
public class DataFrameJoinTests
11+
public class DataFrameJoinExtensionsTests
1212
{
1313
[Fact]
14-
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_EmptyResult()
14+
public void GetSortedListsIntersection_EmptyCollections_EmptyResult()
1515
{
1616
// Arrange
1717

@@ -28,7 +28,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_Empty
2828
}
2929

3030
[Fact]
31-
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_FirstIsNotEmpty_EmptyResult()
31+
public void GetSortedListsIntersection_EmptyCollections_FirstIsNotEmpty_EmptyResult()
3232
{
3333
// Arrange
3434

@@ -51,7 +51,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_First
5151
}
5252

5353
[Fact]
54-
public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_SecondIsNotEmpty_EmptyResult()
54+
public void GetSortedListsIntersection_EmptyCollections_SecondIsNotEmpty_EmptyResult()
5555
{
5656
// Arrange
5757

@@ -74,7 +74,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_EmptyCollections_Secon
7474
}
7575

7676
[Fact]
77-
public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_WithoutIntersection_Success()
77+
public void GetSortedListsIntersection_SortedCollections_WithoutIntersection_Success()
7878
{
7979
// Arrange
8080

@@ -105,7 +105,7 @@ public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_With
105105
}
106106

107107
[Fact]
108-
public void DataFrameJoinTests_GetSortedListsIntersection_SortedCollections_WithIntersection_Success()
108+
public void GetSortedListsIntersection_SortedCollections_WithIntersection_Success()
109109
{
110110
// Arrange
111111

0 commit comments

Comments
 (0)