Skip to content

Commit

Permalink
fix #5767 issue with DataFrame Merge method (#5768)
Browse files Browse the repository at this point in the history
  • Loading branch information
Aleksei Smirnov committed Apr 29, 2021
1 parent ff0c347 commit 9ece0ff
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
17 changes: 8 additions & 9 deletions src/Microsoft.Data.Analysis/DataFrame.Join.cs
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
// Hash the column with the smaller RowCount
long leftRowCount = Rows.Count;
long rightRowCount = other.Rows.Count;
DataFrame longerDataFrame = leftRowCount <= rightRowCount ? other : this;
DataFrame shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this;
DataFrameColumn hashColumn = (leftRowCount <= rightRowCount) ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn];

var leftColumnIsSmaller = (leftRowCount <= rightRowCount);
DataFrameColumn hashColumn = leftColumnIsSmaller ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn];
DataFrameColumn otherColumn = ReferenceEquals(hashColumn, Columns[leftJoinColumn]) ? other.Columns[rightJoinColumn] : Columns[leftJoinColumn];
Dictionary<TKey, ICollection<long>> multimap = hashColumn.GroupColumnValues<TKey>();

Expand All @@ -270,23 +270,21 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
{
if (hashColumn[row] == null)
{
leftRowIndices.Append(row);
rightRowIndices.Append(i);
leftRowIndices.Append(leftColumnIsSmaller ? row : i);
rightRowIndices.Append(leftColumnIsSmaller ? i : row);
}
}
else
{
if (hashColumn[row] != null)
{
leftRowIndices.Append(row);
rightRowIndices.Append(i);
leftRowIndices.Append(leftColumnIsSmaller ? row : i);
rightRowIndices.Append(leftColumnIsSmaller ? i : row);
}
}
}
}
}
leftDataFrame = shorterDataFrame;
rightDataFrame = longerDataFrame;
}
else if (joinAlgorithm == JoinAlgorithm.FullOuter)
{
Expand Down Expand Up @@ -366,4 +364,5 @@ public DataFrame Merge<TKey>(DataFrame other, string leftJoinColumn, string righ
}

}

}
19 changes: 19 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,25 @@ public void TestSample()
Assert.Throws<ArgumentException>(()=> df.Sample(13));
}

[Theory]
[InlineData(1, 2)]
[InlineData(2, 1)]
public void TestDataCorrectnessForInnerMerge(int leftCount, int rightCount)
{
DataFrame left = MakeDataFrameWithNumericColumns(leftCount, false);
DataFrameColumn leftStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, leftCount).Select(x => "Left"));
left.Columns.Insert(left.Columns.Count, leftStringColumn);

DataFrame right = MakeDataFrameWithNumericColumns(rightCount, false);
DataFrameColumn rightStringColumn = new StringDataFrameColumn("String", Enumerable.Range(0, rightCount).Select(x => "Right"));
right.Columns.Insert(right.Columns.Count, rightStringColumn);

DataFrame merge = left.Merge<int>(right, "Int", "Int", joinAlgorithm: JoinAlgorithm.Inner);

Assert.Equal("Left", (string)merge.Columns["String_left"][0]);
Assert.Equal("Right", (string)merge.Columns["String_right"][0]);
}

[Fact]
public void TestMerge()
{
Expand Down

0 comments on commit 9ece0ff

Please sign in to comment.