Skip to content

Commit 9ece0ff

Browse files
author
Aleksei Smirnov
authored
fix #5767 issue with DataFrame Merge method (#5768)
1 parent ff0c347 commit 9ece0ff

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

src/Microsoft.Data.Analysis/DataFrame.Join.cs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
252252
// Hash the column with the smaller RowCount
253253
long leftRowCount = Rows.Count;
254254
long rightRowCount = other.Rows.Count;
255-
DataFrame longerDataFrame = leftRowCount <= rightRowCount ? other : this;
256-
DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this;
257-
DataFrameColumn hashColumn = (leftRowCount <= rightRowCount) ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn];
255+
256+
var leftColumnIsSmaller = (leftRowCount <= rightRowCount);
257+
DataFrameColumn hashColumn = leftColumnIsSmaller ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn];
258258
DataFrameColumn otherColumn = ReferenceEquals(hashColumn, Columns[leftJoinColumn]) ? other.Columns[rightJoinColumn] : Columns[leftJoinColumn];
259259
Dictionary<TKey, ICollection<long>> multimap = hashColumn.GroupColumnValues<TKey>();
260260

@@ -270,23 +270,21 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
270270
{
271271
if (hashColumn[row] == null)
272272
{
273-
leftRowIndices.Append(row);
274-
rightRowIndices.Append(i);
273+
leftRowIndices.Append(leftColumnIsSmaller ? row : i);
274+
rightRowIndices.Append(leftColumnIsSmaller ? i : row);
275275
}
276276
}
277277
else
278278
{
279279
if (hashColumn[row] != null)
280280
{
281-
leftRowIndices.Append(row);
282-
rightRowIndices.Append(i);
281+
leftRowIndices.Append(leftColumnIsSmaller ? row : i);
282+
rightRowIndices.Append(leftColumnIsSmaller ? i : row);
283283
}
284284
}
285285
}
286286
}
287287
}
288-
leftDataFrame = shorterDataFrame;
289-
rightDataFrame = longerDataFrame;
290288
}
291289
else if (joinAlgorithm == JoinAlgorithm.FullOuter)
292290
{
@@ -366,4 +364,5 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
366364
}
367365

368366
}
367+
369368
}

test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,6 +1579,25 @@ public void TestSample()
15791579
Assert.Throws<ArgumentException>(()=> df.Sample(13));
15801580
}
15811581

1582+
[Theory]
1583+
[InlineData(1, 2)]
1584+
[InlineData(2, 1)]
1585+
public void TestDataCorrectnessForInnerMerge(int leftCount, int rightCount)
1586+
{
1587+
DataFrame left = MakeDataFrameWithNumericColumns(leftCount, false);
1588+
DataFrameColumn leftStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, leftCount).Select(x => "Left"));
1589+
left.Columns.Insert(left.Columns.Count, leftStringColumn);
1590+
1591+
DataFrame right = MakeDataFrameWithNumericColumns(rightCount, false);
1592+
DataFrameColumn rightStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, rightCount).Select(x => "Right"));
1593+
right.Columns.Insert(right.Columns.Count, rightStringColumn);
1594+
1595+
DataFrame merge = left.Merge<int>(right, "Int", "Int", joinAlgorithm: JoinAlgorithm.Inner);
1596+
1597+
Assert.Equal("Left", (string)merge.Columns["String_left"][0]);
1598+
Assert.Equal("Right", (string)merge.Columns["String_right"][0]);
1599+
}
1600+
15821601
[Fact]
15831602
public void TestMerge()
15841603
{

0 commit comments

Comments
 (0)