Skip to content

Commit c259a33

Browse files
authored
Fix DataFrame Merge issue (#6677)
* Fix DataFrame Merge issue * Return commented test
1 parent a3a6d7b commit c259a33

17 files changed

+221
-3
lines changed

src/Microsoft.Data.Analysis/BooleanDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public BooleanDataFrameColumn(string name, long length = 0) : base(name, length)
1919
public BooleanDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal BooleanDataFrameColumn(string name, PrimitiveColumnContainer<bool> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<bool> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new BooleanDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<bool> CreateNewColumn(string name, PrimitiveColumnContainer<bool> container)
29+
{
30+
return new BooleanDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/ByteDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public ByteDataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public ByteDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal ByteDataFrameColumn(string name, PrimitiveColumnContainer<byte> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<byte> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new ByteDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<byte> CreateNewColumn(string name, PrimitiveColumnContainer<byte> container)
29+
{
30+
return new ByteDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/CharDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public CharDataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public CharDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal CharDataFrameColumn(string name, PrimitiveColumnContainer<char> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<char> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new CharDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<char> CreateNewColumn(string name, PrimitiveColumnContainer<char> container)
29+
{
30+
return new CharDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,23 @@ public PrimitiveDataFrameColumn<T> GetPrimitiveColumn<T>(string name)
191191
throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(T)), nameof(T));
192192
}
193193

194+
/// <summary>
195+
/// Gets the <see cref="DateTimeDataFrameColumn"/> with the specified <paramref name="name"/>.
196+
/// </summary>
197+
/// <param name="name">The name of the column</param>
198+
/// <returns><see cref="DateTimeDataFrameColumn"/>.</returns>
199+
/// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
200+
public DateTimeDataFrameColumn GetDateTimeColumn(string name)
201+
{
202+
DataFrameColumn column = this[name];
203+
if (column is DateTimeDataFrameColumn ret)
204+
{
205+
return ret;
206+
}
207+
208+
throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(DateTime)));
209+
}
210+
194211
/// <summary>
195212
/// Gets the <see cref="ArrowStringDataFrameColumn"/> with the specified <paramref name="name"/>.
196213
/// </summary>

src/Microsoft.Data.Analysis/DateTimeDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public DateTimeDataFrameColumn(string name, long length = 0) : base(name, length
1919
public DateTimeDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal DateTimeDataFrameColumn(string name, PrimitiveColumnContainer<DateTime> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<DateTime> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new DateTimeDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<DateTime> CreateNewColumn(string name, PrimitiveColumnContainer<DateTime> container)
29+
{
30+
return new DateTimeDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/DecimalDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public DecimalDataFrameColumn(string name, long length = 0) : base(name, length)
1919
public DecimalDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal DecimalDataFrameColumn(string name, PrimitiveColumnContainer<decimal> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<decimal> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new DecimalDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<decimal> CreateNewColumn(string name, PrimitiveColumnContainer<decimal> container)
29+
{
30+
return new DecimalDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/DoubleDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public DoubleDataFrameColumn(string name, long length = 0) : base(name, length)
1919
public DoubleDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal DoubleDataFrameColumn(string name, PrimitiveColumnContainer<double> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<double> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new DoubleDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<double> CreateNewColumn(string name, PrimitiveColumnContainer<double> container)
29+
{
30+
return new DoubleDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/Int16DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public Int16DataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public Int16DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal Int16DataFrameColumn(string name, PrimitiveColumnContainer<short> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<short> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new Int16DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<short> CreateNewColumn(string name, PrimitiveColumnContainer<short> container)
29+
{
30+
return new Int16DataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/Int32DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public Int32DataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public Int32DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal Int32DataFrameColumn(string name, PrimitiveColumnContainer<int> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<int> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new Int32DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<int> CreateNewColumn(string name, PrimitiveColumnContainer<int> container)
29+
{
30+
return new Int32DataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/Int64DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public Int64DataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public Int64DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal Int64DataFrameColumn(string name, PrimitiveColumnContainer<long> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<long> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new Int64DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<long> CreateNewColumn(string name, PrimitiveColumnContainer<long> container)
29+
{
30+
return new Int64DataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,16 @@ protected override IReadOnlyList<object> GetValues(long startIndex, int length)
188188
return ret;
189189
}
190190

191+
internal virtual PrimitiveDataFrameColumn<T> CreateNewColumn(string name, PrimitiveColumnContainer<T> container)
192+
{
193+
return new PrimitiveDataFrameColumn<T>(name, container);
194+
}
195+
196+
protected virtual PrimitiveDataFrameColumn<T> CreateNewColumn(string name, long length = 0)
197+
{
198+
return new PrimitiveDataFrameColumn<T>(name, length);
199+
}
200+
191201
internal T? GetTypedValue(long rowIndex) => _columnContainer[rowIndex];
192202

193203
protected override object GetValue(long rowIndex) => GetTypedValue(rowIndex);
@@ -379,7 +389,7 @@ private PrimitiveDataFrameColumn<T> Clone(PrimitiveDataFrameColumn<bool> boolCol
379389
{
380390
if (boolColumn.Length > Length)
381391
throw new ArgumentException(Strings.MapIndicesExceedsColumnLenth, nameof(boolColumn));
382-
PrimitiveDataFrameColumn<T> ret = new PrimitiveDataFrameColumn<T>(Name);
392+
PrimitiveDataFrameColumn<T> ret = CreateNewColumn(Name);
383393
for (long i = 0; i < boolColumn.Length; i++)
384394
{
385395
bool? value = boolColumn[i];
@@ -406,7 +416,8 @@ private PrimitiveDataFrameColumn<T> CloneImplementation<U>(PrimitiveDataFrameCol
406416
}
407417
else
408418
throw new NotImplementedException();
409-
PrimitiveDataFrameColumn<T> ret = new PrimitiveDataFrameColumn<T>(Name, retContainer);
419+
420+
PrimitiveDataFrameColumn<T> ret = CreateNewColumn(Name, retContainer);
410421
return ret;
411422
}
412423

@@ -415,7 +426,7 @@ public PrimitiveDataFrameColumn<T> Clone(PrimitiveDataFrameColumn<long> mapIndic
415426
if (mapIndices is null)
416427
{
417428
PrimitiveColumnContainer<T> newColumnContainer = _columnContainer.Clone();
418-
return new PrimitiveDataFrameColumn<T>(Name, newColumnContainer);
429+
return CreateNewColumn(Name, newColumnContainer);
419430
}
420431
else
421432
{

src/Microsoft.Data.Analysis/SByteDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public SByteDataFrameColumn(string name, long length = 0) : base(name, length) {
1919
public SByteDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal SByteDataFrameColumn(string name, PrimitiveColumnContainer<sbyte> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<sbyte> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new SByteDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<sbyte> CreateNewColumn(string name, PrimitiveColumnContainer<sbyte> container)
29+
{
30+
return new SByteDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/SingleDataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public SingleDataFrameColumn(string name, long length = 0) : base(name, length)
1919
public SingleDataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal SingleDataFrameColumn(string name, PrimitiveColumnContainer<float> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<float> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new SingleDataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<float> CreateNewColumn(string name, PrimitiveColumnContainer<float> container)
29+
{
30+
return new SingleDataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/UInt16DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public UInt16DataFrameColumn(string name, long length = 0) : base(name, length)
1919
public UInt16DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal UInt16DataFrameColumn(string name, PrimitiveColumnContainer<ushort> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<ushort> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new UInt16DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<ushort> CreateNewColumn(string name, PrimitiveColumnContainer<ushort> container)
29+
{
30+
return new UInt16DataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/UInt32DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public UInt32DataFrameColumn(string name, long length = 0) : base(name, length)
1919
public UInt32DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal UInt32DataFrameColumn(string name, PrimitiveColumnContainer<uint> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<uint> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new UInt32DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<uint> CreateNewColumn(string name, PrimitiveColumnContainer<uint> container)
29+
{
30+
return new UInt32DataFrameColumn(name, container);
31+
}
2232
}
2333
}

src/Microsoft.Data.Analysis/UInt64DataFrameColumn.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,15 @@ public UInt64DataFrameColumn(string name, long length = 0) : base(name, length)
1919
public UInt64DataFrameColumn(string name, ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte> nullBitMap, int length = 0, int nullCount = 0) : base(name, buffer, nullBitMap, length, nullCount) { }
2020

2121
internal UInt64DataFrameColumn(string name, PrimitiveColumnContainer<ulong> values) : base(name, values) { }
22+
23+
protected override PrimitiveDataFrameColumn<ulong> CreateNewColumn(string name, long length = 0)
24+
{
25+
return new UInt64DataFrameColumn(name, length);
26+
}
27+
28+
internal override PrimitiveDataFrameColumn<ulong> CreateNewColumn(string name, PrimitiveColumnContainer<ulong> container)
29+
{
30+
return new UInt64DataFrameColumn(name, container);
31+
}
2232
}
2333
}

test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2659,6 +2659,56 @@ public void TestMerge_Issue5778()
26592659
MatchRowsOnMergedDataFrame(merge, left, right, 1, 1, 0);
26602660
}
26612661

2662+
[Fact]
2663+
//Issue 6127
2664+
public void TestMerge_CorrectColumnTypes()
2665+
{
2666+
DataFrame left = MakeDataFrameWithAllMutableColumnTypes(2, false);
2667+
DataFrame right = MakeDataFrameWithAllMutableColumnTypes(1);
2668+
2669+
DataFrame merge = left.Merge<int>(right, "Int", "Int");
2670+
2671+
Assert.NotNull(merge.Columns.GetBooleanColumn("Bool_left"));
2672+
Assert.NotNull(merge.Columns.GetBooleanColumn("Bool_right"));
2673+
2674+
Assert.NotNull(merge.Columns.GetDecimalColumn("Decimal_left"));
2675+
Assert.NotNull(merge.Columns.GetDecimalColumn("Decimal_right"));
2676+
2677+
Assert.NotNull(merge.Columns.GetSingleColumn("Float_left"));
2678+
Assert.NotNull(merge.Columns.GetSingleColumn("Float_right"));
2679+
2680+
Assert.NotNull(merge.Columns.GetDoubleColumn("Double_left"));
2681+
Assert.NotNull(merge.Columns.GetDoubleColumn("Double_right"));
2682+
2683+
Assert.NotNull(merge.Columns.GetByteColumn("Byte_left"));
2684+
Assert.NotNull(merge.Columns.GetByteColumn("Byte_right"));
2685+
2686+
Assert.NotNull(merge.Columns.GetCharColumn("Char_left"));
2687+
Assert.NotNull(merge.Columns.GetCharColumn("Char_right"));
2688+
2689+
Assert.NotNull(merge.Columns.GetInt16Column("Short_left"));
2690+
Assert.NotNull(merge.Columns.GetInt16Column("Short_right"));
2691+
2692+
Assert.NotNull(merge.Columns.GetUInt16Column("Ushort_left"));
2693+
Assert.NotNull(merge.Columns.GetUInt16Column("Ushort_right"));
2694+
2695+
Assert.NotNull(merge.Columns.GetInt32Column("Int_left"));
2696+
Assert.NotNull(merge.Columns.GetInt32Column("Int_right"));
2697+
2698+
Assert.NotNull(merge.Columns.GetUInt32Column("Uint_left"));
2699+
Assert.NotNull(merge.Columns.GetUInt32Column("Uint_right"));
2700+
2701+
Assert.NotNull(merge.Columns.GetInt64Column("Long_left"));
2702+
Assert.NotNull(merge.Columns.GetInt64Column("Long_right"));
2703+
2704+
Assert.NotNull(merge.Columns.GetUInt64Column("Ulong_left"));
2705+
Assert.NotNull(merge.Columns.GetUInt64Column("Ulong_right"));
2706+
2707+
Assert.NotNull(merge.Columns.GetDateTimeColumn("DateTime_left"));
2708+
Assert.NotNull(merge.Columns.GetDateTimeColumn("DateTime_right"));
2709+
2710+
}
2711+
26622712
[Fact]
26632713
public void TestDescription()
26642714
{

0 commit comments

Comments
 (0)