Skip to content

Reset DataFrame.RowCount to zero, when DataFrame is empty #6698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,25 @@ public void Insert<T>(int columnIndex, IEnumerable<T> column, string columnName)
protected override void InsertItem(int columnIndex, DataFrameColumn column)
{
column = column ?? throw new ArgumentNullException(nameof(column));
if (RowCount > 0 && column.Length != RowCount)

if (Count == 0)
{
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
//change RowCount on inserting first row to dataframe
RowCount = column.Length;
}

if (Count >= 1 && RowCount == 0 && column.Length != RowCount)
else if (column.Length != RowCount)
{
//check all columns in the dataframe have the same length (amount of rows)
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
}

if (_columnNameToIndexDictionary.ContainsKey(column.Name))
{
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}

RowCount = column.Length;

_columnNameToIndexDictionary[column.Name] = columnIndex;
for (int i = columnIndex + 1; i < Count; i++)
{
Expand Down Expand Up @@ -108,6 +112,11 @@ protected override void RemoveItem(int columnIndex)
_columnNameToIndexDictionary[this[i].Name]--;
}
base.RemoveItem(columnIndex);

//Reset RowCount if the last column was removed and dataframe is empty
if (Count == 0)
RowCount = 0;

ColumnsChanged?.Invoke();
}

Expand Down Expand Up @@ -138,6 +147,9 @@ protected override void ClearItems()
base.ClearItems();
ColumnsChanged?.Invoke();
_columnNameToIndexDictionary.Clear();

//reset RowCount as DataFrame is now empty
RowCount = 0;
}

/// <summary>
Expand Down
101 changes: 83 additions & 18 deletions test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -271,29 +271,44 @@ public void TestIndexer()
[Fact]
public void ColumnAndTableCreationTest()
{
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
DataFrameColumn floatColumn = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
const int rowCount = 10;
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
DataFrameColumn floatColumn = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
DataFrame dataFrame = new DataFrame();
dataFrame.Columns.Insert(0, intColumn);
dataFrame.Columns.Insert(1, floatColumn);
Assert.Equal(10, dataFrame.Rows.Count);
Assert.Equal(rowCount, dataFrame.Rows.Count);
Assert.Equal(2, dataFrame.Columns.Count);
Assert.Equal(10, dataFrame.Columns[0].Length);
Assert.Equal(2, dataFrame.Columns.LongCount());
Assert.Equal(rowCount, dataFrame.Columns[0].Length);
Assert.Equal("IntColumn", dataFrame.Columns[0].Name);
Assert.Equal(10, dataFrame.Columns[1].Length);
Assert.Equal(rowCount, dataFrame.Columns[1].Length);
Assert.Equal("FloatColumn", dataFrame.Columns[1].Name);

DataFrameColumn bigColumn = new SingleDataFrameColumn("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
DataFrameColumn repeatedName = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
//add column with bigger length than other columns in the dataframe
DataFrameColumn bigColumn = new SingleDataFrameColumn("BigColumn", Enumerable.Range(0, rowCount + 1).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, bigColumn));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(bigColumn));

//add column smaller than other columns in the dataframe
DataFrameColumn smallColumn = new SingleDataFrameColumn("SmallColumn", Enumerable.Range(0, rowCount - 1).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, smallColumn));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(smallColumn));

//add column with duplicate name
DataFrameColumn repeatedName = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, repeatedName));
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(10, repeatedName));

//Insert column at index out of range
DataFrameColumn extraColumn = new SingleDataFrameColumn("OtherFloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
var columnCount = dataFrame.Columns.Count;
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(columnCount + 1, repeatedName));

Assert.Equal(2, dataFrame.Columns.Count);
DataFrameColumn intColumnCopy = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
DataFrameColumn intColumnCopy = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns[1] = intColumnCopy);

DataFrameColumn differentIntColumn = new Int32DataFrameColumn("IntColumn1", Enumerable.Range(0, 10).Select(x => x));
DataFrameColumn differentIntColumn = new Int32DataFrameColumn("IntColumn1", Enumerable.Range(0, rowCount).Select(x => x));
dataFrame.Columns[1] = differentIntColumn;
Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Columns[1]));

Expand All @@ -309,18 +324,68 @@ public void ColumnAndTableCreationTest()
}

[Fact]
public void InsertAndRemoveColumnTests()
public void InsertAndRemoveColumnToTheEndOfNotEmptyDataFrameTests()
{
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
DataFrameColumn charColumn = dataFrame.Columns["Char"];
int insertedIndex = dataFrame.Columns.Count;
dataFrame.Columns.Insert(dataFrame.Columns.Count, intColumn);
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));

int columnCount = dataFrame.Columns.Count;
DataFrameColumn originalLastColumn = dataFrame.Columns[columnCount - 1];

//Insert new column at the end
dataFrame.Columns.Insert(columnCount, intColumn);
Assert.Equal(columnCount + 1, dataFrame.Columns.Count);

//Remove first
dataFrame.Columns.RemoveAt(0);
DataFrameColumn intColumn_1 = dataFrame.Columns["IntColumn"];
DataFrameColumn charColumn_1 = dataFrame.Columns["Char"];
Assert.Equal(columnCount, dataFrame.Columns.Count);

//Check that int column was inserted
DataFrameColumn intColumn_1 = dataFrame.Columns["NewIntColumn"];
Assert.True(ReferenceEquals(intColumn, intColumn_1));
Assert.True(ReferenceEquals(charColumn, charColumn_1));

//Check that last column of the original dataframe was not removed
DataFrameColumn lastColumn_1 = dataFrame.Columns[originalLastColumn.Name];
Assert.True(ReferenceEquals(originalLastColumn, lastColumn_1));

//Check that new column is the last one
int newIndex = dataFrame.Columns.IndexOf("NewIntColumn");
Assert.Equal(columnCount - 1, newIndex);

//Check that original last column now has correct index
int newIndexForOriginalLastColumn = dataFrame.Columns.IndexOf(originalLastColumn.Name);
Assert.Equal(columnCount - 2, newIndexForOriginalLastColumn);
}

[Fact]
public void AddAndRemoveColumnToTheEmptyDataFrameTests()
{
DataFrame dataFrame = new DataFrame();
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));

dataFrame.Columns.Add(intColumn);
Assert.Single(dataFrame.Columns);
Assert.Equal(10, dataFrame.Rows.Count);

dataFrame.Columns.Remove(intColumn);
Assert.Empty(dataFrame.Columns);
Assert.Equal(0, dataFrame.Rows.Count);
}

[Fact]
public void ClearColumnsTests()
{
//Arrange
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);

//Act
dataFrame.Columns.Clear();

//Assert
Assert.Empty(dataFrame.Columns);

Assert.Equal(0, dataFrame.Rows.Count);
Assert.Equal(0, dataFrame.Columns.LongCount());
}

[Fact]
Expand Down