diff --git a/lib/src/dataframe.dart b/lib/src/dataframe.dart index f835125..028e0bd 100644 --- a/lib/src/dataframe.dart +++ b/lib/src/dataframe.dart @@ -5,6 +5,7 @@ import 'dart:io'; import 'package:collection/collection.dart'; import 'package:csv/csv.dart'; import 'package:jiffy/jiffy.dart'; +import 'package:koala/koala.dart'; import 'column.dart'; import 'utils/list_base.dart'; @@ -26,15 +27,13 @@ typedef DataMatrix = List; /// Row access is granted through regular indexing, as DataFrame extends the data matrix of shape (rows x columns). /// Columns may be accessed via dataframe('columnName'). class DataFrame extends ListBase { - final ElementPositionTrackingList _columnNames; + final ElementPositionTrackingList _trackedColumnNames; // ************ constructors **************** /// Build a dataframe from specified [columnNames] and [data]. /// The [data] is expected to be of the shape (rows x columns). - DataFrame(List columnNames, DataMatrix data) - : this._columnNames = ElementPositionTrackingList(columnNames), - super(data) { + factory DataFrame.fromNamesAndData(List columnNames, DataMatrix data){ if (data.isEmpty) { throw ArgumentError( 'Did not receive any data; Use DataFrame.empty() to create an empty DataFrame'); @@ -43,20 +42,25 @@ class DataFrame extends ListBase { throw ArgumentError('Number of column names = ${columnNames.length} does ' 'not match number of data column = ${data.first.length}'); } + return DataFrame._default(columnNames, data); } /// Builds a dataframe from a list of [rowMaps], e.g. /// [{'col1': 420, 'col2': 69}, /// {'col1': 666, 'col2': 1470}] DataFrame.fromRowMaps(List rowMaps) - : this._columnNames = - ElementPositionTrackingList(rowMaps.first.keys.toList()), - super(rowMaps.map((e) => e.values.toList()).toList()); + : this._default( + rowMaps.first.keys.toList(), + rowMaps.map((e) => e.values.toList()).toList() + ); /// Returns an empty dataframe. DataFrame.empty() - : this._columnNames = ElementPositionTrackingList([]), - super([]); + : this._default([], []); + + DataFrame._default(List columnNames, DataMatrix data) + : this._trackedColumnNames = ElementPositionTrackingList(columnNames), + super(data); /// Build a dataframe from csv data. /// @@ -141,7 +145,7 @@ class DataFrame extends ListBase { } // instantiate DataFrame - final df = DataFrame(columnNames, fields); + final df = DataFrame.fromNamesAndData(columnNames, fields); // skip columns if required if (skipColumns != null) { @@ -165,7 +169,7 @@ class DataFrame extends ListBase { // attempt to convert dates if required if (convertDates) { - for (final name in df._columnNames) { + for (final name in df._trackedColumnNames) { try { df.transformColumn( name, @@ -215,19 +219,20 @@ class DataFrame extends ListBase { // ************** column names *************** - List get columnNames => _columnNames; + List get columnNames => _trackedColumnNames.l; - int get nColumns => _columnNames.length; + int get nColumns => columnNames.length; /// Accesses column index in O(1) int columnIndex(String colName) { try { - return _columnNames.indexOf(colName); + return _trackedColumnNames.indexOf(colName); } catch (_) { throw ArgumentError("Column named '$colName' not present in DataFrame"); } } + /// Returns a list of [nRows, nColumns] List get shape => [length, nColumns]; // ************* data access ***************** @@ -238,32 +243,38 @@ class DataFrame extends ListBase { Column call(String colName, {int start = 0, int? end}) => Column(columnIterable(colName, start: start, end: end).toList()); + /// Returns an iterable over the records of a column sliced as per [start] + /// and [end]. Iterable columnIterable(String colName, {int start = 0, int? end}) => sublist(start, end).map((row) => row[columnIndex(colName)]).cast(); - DataFrame withColumns(List columnNames) => DataFrame._copied( - ElementPositionTrackingList(columnNames), - columnNames.map((e) => this(e)).transposed()); - /// Returns an iterable over the columns. - Iterable columns() => _columnNames.map((e) => this(e)); + Iterable columns() => _trackedColumnNames.map(call); /// Grab a (typed) record sitting at dataframe[rowIndex][colName]. T record(int rowIndex, String colName) => this[rowIndex][columnIndex(colName)] as T; - DataFrame rowsAt(Iterable indices) => - DataFrame._copied(_columnNames, indices.map((e) => this[e]).toList()); + /// Returns a [DataFrame] comprised of a subset of present columns + /// as specified by [columnNames] + DataFrame withColumns(List columnNames, {bool asView = true}) => + _viewOrCopy(columnNames, columnNames.map((e) => this(e)).transposed(), asView); + + DataFrame rowsAt(Iterable indices, {bool asView = true}) => + _viewOrCopy(_trackedColumnNames.l, indices.map((e) => this[e]).toList(), asView); - DataFrame rowsWhere(List mask) => - DataFrame._copied(_columnNames, applyMask(mask).toList()); + DataFrame rowsWhere(List mask, {bool asView = true}) => + _viewOrCopy(_trackedColumnNames.l, applyMask(mask).toList(), asView); + + DataFrame _viewOrCopy(List columnNames, DataMatrix data, bool asView) => + asView ? DataFrame._default(columnNames, data) : _copied(columnNames, data); // **************** manipulation ****************** /// Add a new column to the end of the dataframe. The [records] have to be of the same length /// as the dataframe. void addColumn(String name, RecordCol records) { - if (_columnNames.contains(name)) { + if (_trackedColumnNames.contains(name)) { throw ArgumentError('$name column does already exist'); } @@ -276,22 +287,21 @@ class DataFrame extends ListBase { 'Length of column records does not match the one of the data frame'); } - _columnNames.add(name); + _trackedColumnNames.add(name); } - /// Remove a column from the dataframe. + /// Remove a column from the dataframe and return it. RecordCol removeColumn(String name) { final index = columnIndex(name); - _columnNames.removeAt(index); + _trackedColumnNames.removeAt(index); return map((element) { element.removeAt(index); }).toList(); } /// Transform the values corresponding to [name] as per [transformElement] in-place. - void transformColumn( - String name, dynamic Function(dynamic element) transformElement) { - this(name).forEachIndexed((i, element) { + void transformColumn(String name, dynamic Function(dynamic element) transformElement) { + columnIterable(name).forEachIndexed((i, element) { this[i][columnIndex(name)] = transformElement(element); }); } @@ -299,37 +309,35 @@ class DataFrame extends ListBase { /// Add a new row represented by [rowMap] of the structure {columnName: record} /// to the end of the dataframe. void addRowFromMap(RecordRowMap rowMap) => - add([for (final name in _columnNames) rowMap[name]]); + add([for (final name in _trackedColumnNames) rowMap[name]]); // ************ map representations ************* /// Returns a list of {columnName: value} Map-representations for each row. List rowMaps() => - [for (final row in this) Map.fromIterables(_columnNames, row)]; + [for (final row in this) Map.fromIterables(_trackedColumnNames, row)]; /// Returns a {columnName: columnData} representation. Map columnMap() => Map.fromIterable( - _columnNames, + _trackedColumnNames, value: (name) => this(name), ); // ************ copying ************* - /// Returns a deep copy of the dataframe. - DataFrame copy() => DataFrame._copied(_columnNames, this); + /// Returns a copy of the instance. + DataFrame copy() => _copied(columnNames, this); - DataFrame._copied( - ElementPositionTrackingList columns, DataMatrix data) - : this._columnNames = columns.copy(), - super(ListListExtensions(data).copy()); + DataFrame _copied(List names, DataMatrix data) => + DataFrame._default(names.copy(), data.copy()); // ************** slicing **************** - /// Returns a new, row-sliced dataframe. - DataFrame sliced(int start, [int? end]) => - DataFrame._copied(_columnNames, sublist(start, end)); + /// Returns a new, row-sliced instance. + DataFrame sliced(int start, {int? end, bool asView = true}) => + _viewOrCopy(columnNames, sublist(start, end), asView); - /// Slice dataframe in-place. + /// Row-slice instance in-place. void slice(int start, [int? end]) { if (start != 0) removeRange(0, start); if (end != null) removeRange(end, length); @@ -350,8 +358,8 @@ class DataFrame extends ListBase { {bool ascending = true, bool nullsFirst = true, Comparator? compareRecords}) => - DataFrame._copied( - _columnNames, + _copied( + _trackedColumnNames, _sort(colName, inPlace: false, ascending: ascending, @@ -406,9 +414,9 @@ class DataFrame extends ListBase { // ************* Object overrides ****************** - /// Returns hashCode accounting for both the [_data] and [_columnNames] + /// Returns hashCode accounting for both the [_data] and [_trackedColumnNames] @override - int get hashCode => l.hashCode + _columnNames.hashCode; + int get hashCode => l.hashCode + _trackedColumnNames.hashCode; @override bool operator ==(Object other) => diff --git a/lib/src/utils/element_position_tracking_list.dart b/lib/src/utils/element_position_tracking_list.dart index 9446ef8..c9c413f 100644 --- a/lib/src/utils/element_position_tracking_list.dart +++ b/lib/src/utils/element_position_tracking_list.dart @@ -15,6 +15,10 @@ class ElementPositionTrackingList extends ListBase { // *************** overrides ******************* + @override + bool contains(Object? element) => + _object2Index.containsKey(element); + @override void add(T element) { super.add(element); diff --git a/test/src/dataframe_test.dart b/test/src/dataframe_test.dart index 4bf010a..ece4240 100644 --- a/test/src/dataframe_test.dart +++ b/test/src/dataframe_test.dart @@ -18,9 +18,9 @@ DataFrame _getDF() => DataFrame.fromRowMaps([ void main() { group('constructors', (){ test('default', () { - expect(() => DataFrame(['b'], []), throwsArgumentError); + expect(() => DataFrame.fromNamesAndData(['b'], []), throwsArgumentError); expect( - () => DataFrame([ + () => DataFrame.fromNamesAndData([ 'b' ], [ [888, 1] @@ -150,7 +150,7 @@ void main() { '2 | null 8 '); final df_with_longer_elements_than_column_names = - DataFrame([ + DataFrame.fromNamesAndData([ 'a', 'b' ], [ @@ -170,7 +170,7 @@ void main() { ..slice(0, 30); expect(df.length, 30); - final sliced = df.sliced(5, 25); + final sliced = df.sliced(5, end: 25); expect(sliced.length, 20); // Ensure disentanglement of copied properties @@ -213,7 +213,7 @@ void main() { }); test('data access', () { - final df = DataFrame( + final df = DataFrame.fromNamesAndData( ['a', 'b', 'c'], [ [43, 'omg', null], @@ -254,7 +254,7 @@ void main() { // .withColumns + .sliced expect( - df.withColumns(['a', 'b']).sliced(1, 3).toString(), + df.withColumns(['a', 'b']).sliced(1, end: 3).toString(), ' a b \n' '0 | 701 \n' '1 | -9 ubiquitous' @@ -342,7 +342,7 @@ void main() { group('.toCsv', () { test('default', () async { final outputCsvPath = outputFilePath('out.csv'); - final df = DataFrame([ + final df = DataFrame.fromNamesAndData([ 'a', 'b', 'c' @@ -358,7 +358,7 @@ void main() { test('with null', () async { final outputCsvPath = outputFilePath('out1.csv'); - final df = DataFrame([ + final df = DataFrame.fromNamesAndData([ 'a', 'b', 'c' @@ -382,7 +382,7 @@ void main() { test('with single quote including strings', () async { final outputCsvPath = outputFilePath('out3.csv'); - final df = DataFrame([ + final df = DataFrame.fromNamesAndData([ 'a', 'b', 'c' @@ -398,7 +398,7 @@ void main() { test('without header', () async { final outputCsvPath = outputFilePath('out4.csv'); - final df = DataFrame([ + final df = DataFrame.fromNamesAndData([ 'a', 'b', 'c'