Skip to content

Commit

Permalink
Fix the behavior or column SetName method (#6676)
Browse files Browse the repository at this point in the history
* Fix the behavior or column SetName method

* Fix stack overflow exception

* Fix merge issues

---------

Co-authored-by: Michael Sharp <51342856+michaelgsharp@users.noreply.github.com>
  • Loading branch information
asmirnov82 and michaelgsharp authored Jul 6, 2023
1 parent 443ceb9 commit 53c0f26
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/Microsoft.Data.Analysis/DataFrame.Join.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ private void SetSuffixForDuplicatedColumnNames(DataFrame dataFrame, DataFrameCol
{
// Pre-existing column. Change name
DataFrameColumn existingColumn = dataFrame.Columns[index];
dataFrame._columnCollection.SetColumnName(existingColumn, existingColumn.Name + leftSuffix);
existingColumn.SetName(existingColumn.Name + leftSuffix);
column.SetName(column.Name + rightSuffix);
index = dataFrame._columnCollection.IndexOf(column.Name);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.Data.Analysis/DataFrame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ public DataFrame AddPrefix(string prefix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
df._columnCollection.SetColumnName(column, prefix + column.Name);
column.SetName(prefix + column.Name);
df.OnColumnsChanged();
}
return df;
Expand All @@ -316,7 +316,7 @@ public DataFrame AddSuffix(string suffix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
df._columnCollection.SetColumnName(column, column.Name + suffix);
column.SetName(column.Name + suffix);
df.OnColumnsChanged();
}
return df;
Expand Down
42 changes: 34 additions & 8 deletions src/Microsoft.Data.Analysis/DataFrameColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ protected set
}
}

// List of ColumnCollections that owns the column
// Current API allows column to be added into multiple dataframes, that's why the list is needed
private readonly List<DataFrameColumnCollection> _ownerColumnCollections = new();

internal void AddOwner(DataFrameColumnCollection columCollection)
{
if (!_ownerColumnCollections.Contains(columCollection))
{
_ownerColumnCollections.Add(columCollection);
}
}

internal void RemoveOwner(DataFrameColumnCollection columCollection)
{
if (_ownerColumnCollections.Contains(columCollection))
{
_ownerColumnCollections.Remove(columCollection);
}
}

/// <summary>
/// The number of <see langword="null" /> values in this column.
/// </summary>
Expand All @@ -95,24 +115,30 @@ public abstract long NullCount
private string _name;

/// <summary>
/// The name of this column.
/// The column name.
/// </summary>
public string Name => _name;

/// <summary>
/// Updates the name of this column.
/// Updates the column name.
/// </summary>
/// <param name="newName">The new name.</param>
/// <param name="dataFrame">If passed in, update the column name in <see cref="DataFrame.Columns"/></param>
public void SetName(string newName, DataFrame dataFrame = null)
public void SetName(string newName)
{
if (!(dataFrame is null))
{
dataFrame.Columns.SetColumnName(this, newName);
}
foreach (var owner in _ownerColumnCollections)
owner.UpdateColumnNameMetadata(this, newName);

_name = newName;
}

/// <summary>
/// Updates the name of this column.
/// </summary>
/// <param name="newName">The new name.</param>
/// <param name="dataFrame">Ignored (for backward compatibility)</param>
[Obsolete]
public void SetName(string newName, DataFrame dataFrame) => SetName(newName);

/// <summary>
/// The type of data this column holds.
/// </summary>
Expand Down
23 changes: 21 additions & 2 deletions src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,23 @@ internal IReadOnlyList<string> GetColumnNames()
return ret;
}

public void RenameColumn(string currentName, string newName)
{
var column = this[currentName];
column.SetName(newName);
}

[Obsolete]
public void SetColumnName(DataFrameColumn column, string newName)
{
column.SetName(newName);
}

//Updates column's metadata (is used as a callback from Column class)
internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName)
{
string currentName = column.Name;
int currentIndex = _columnNameToIndexDictionary[currentName];
column.SetName(newName);
_columnNameToIndexDictionary.Remove(currentName);
_columnNameToIndexDictionary.Add(newName, currentIndex);
ColumnsChanged?.Invoke();
Expand Down Expand Up @@ -75,6 +87,8 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column)
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}

column.AddOwner(this);

RowCount = column.Length;

_columnNameToIndexDictionary[column.Name] = columnIndex;
Expand All @@ -98,9 +112,13 @@ protected override void SetItem(int columnIndex, DataFrameColumn column)
{
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}

_columnNameToIndexDictionary.Remove(this[columnIndex].Name);
_columnNameToIndexDictionary[column.Name] = columnIndex;

this[columnIndex].RemoveOwner(this);
base.SetItem(columnIndex, column);

ColumnsChanged?.Invoke();
}

Expand All @@ -111,6 +129,8 @@ protected override void RemoveItem(int columnIndex)
{
_columnNameToIndexDictionary[this[i].Name]--;
}

this[columnIndex].RemoveOwner(this);
base.RemoveItem(columnIndex);

//Reset RowCount if the last column was removed and dataframe is empty
Expand Down Expand Up @@ -474,6 +494,5 @@ public UInt16DataFrameColumn GetUInt16Column(string name)

throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(UInt16)));
}

}
}
38 changes: 38 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,44 @@ public void ClearColumnsTests()
Assert.Equal(0, dataFrame.Columns.LongCount());
}

[Fact]
public void RenameColumnWithSetNameTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });

DataFrame dataframe = new DataFrame(city, temp);

// Change the name of the column:
dataframe["City"].SetName("Town");
var renamedColumn = dataframe["Town"];

Assert.Throws<ArgumentException>(() => dataframe["City"]);

Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}

[Fact]
public void RenameColumnWithRenameColumnTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });

DataFrame dataframe = new DataFrame(city, temp);

// Change the name of the column:
dataframe.Columns.RenameColumn("City", "Town");
var renamedColumn = dataframe["Town"];

Assert.Throws<ArgumentException>(() => dataframe["City"]);

Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}

[Fact]
public void TestBinaryOperations()
{
Expand Down

0 comments on commit 53c0f26

Please sign in to comment.