Skip to content

Commit

Permalink
Rework constructors; Add 'asView' parameter to DataFrame returning me…
Browse files Browse the repository at this point in the history
…thods
  • Loading branch information
w2sv committed Sep 15, 2022
1 parent 6871bd0 commit 44a79c3
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 57 deletions.
102 changes: 55 additions & 47 deletions lib/src/dataframe.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import 'dart:io';
import 'package:collection/collection.dart';
import 'package:csv/csv.dart';
import 'package:jiffy/jiffy.dart';
import 'package:koala/koala.dart';

import 'column.dart';
import 'utils/list_base.dart';
Expand All @@ -26,15 +27,13 @@ typedef DataMatrix = List<RecordRow>;
/// Row access is granted through regular indexing, as DataFrame extends the data matrix of shape (rows x columns).
/// Columns may be accessed via dataframe('columnName').
class DataFrame extends ListBase<RecordRow> {
final ElementPositionTrackingList<String> _columnNames;
final ElementPositionTrackingList<String> _trackedColumnNames;

// ************ constructors ****************

/// Build a dataframe from specified [columnNames] and [data].
/// The [data] is expected to be of the shape (rows x columns).
DataFrame(List<String> columnNames, DataMatrix data)
: this._columnNames = ElementPositionTrackingList(columnNames),
super(data) {
factory DataFrame.fromNamesAndData(List<String> columnNames, DataMatrix data){
if (data.isEmpty) {
throw ArgumentError(
'Did not receive any data; Use DataFrame.empty() to create an empty DataFrame');
Expand All @@ -43,20 +42,25 @@ class DataFrame extends ListBase<RecordRow> {
throw ArgumentError('Number of column names = ${columnNames.length} does '
'not match number of data column = ${data.first.length}');
}
return DataFrame._default(columnNames, data);
}

/// Builds a dataframe from a list of [rowMaps], e.g.
/// [{'col1': 420, 'col2': 69},
/// {'col1': 666, 'col2': 1470}]
DataFrame.fromRowMaps(List<RecordRowMap> rowMaps)
: this._columnNames =
ElementPositionTrackingList(rowMaps.first.keys.toList()),
super(rowMaps.map((e) => e.values.toList()).toList());
: this._default(
rowMaps.first.keys.toList(),
rowMaps.map((e) => e.values.toList()).toList()
);

/// Returns an empty dataframe.
DataFrame.empty()
: this._columnNames = ElementPositionTrackingList([]),
super([]);
: this._default([], []);

DataFrame._default(List<String> columnNames, DataMatrix data)
: this._trackedColumnNames = ElementPositionTrackingList(columnNames),
super(data);

/// Build a dataframe from csv data.
///
Expand Down Expand Up @@ -141,7 +145,7 @@ class DataFrame extends ListBase<RecordRow> {
}

// instantiate DataFrame
final df = DataFrame(columnNames, fields);
final df = DataFrame.fromNamesAndData(columnNames, fields);

// skip columns if required
if (skipColumns != null) {
Expand All @@ -165,7 +169,7 @@ class DataFrame extends ListBase<RecordRow> {

// attempt to convert dates if required
if (convertDates) {
for (final name in df._columnNames) {
for (final name in df._trackedColumnNames) {
try {
df.transformColumn(
name,
Expand Down Expand Up @@ -215,19 +219,20 @@ class DataFrame extends ListBase<RecordRow> {

// ************** column names ***************

List<String> get columnNames => _columnNames;
List<String> get columnNames => _trackedColumnNames.l;

int get nColumns => _columnNames.length;
int get nColumns => columnNames.length;

/// Accesses column index in O(1)
int columnIndex(String colName) {
try {
return _columnNames.indexOf(colName);
return _trackedColumnNames.indexOf(colName);
} catch (_) {
throw ArgumentError("Column named '$colName' not present in DataFrame");
}
}

/// Returns a list of [nRows, nColumns]
List<int> get shape => [length, nColumns];

// ************* data access *****************
Expand All @@ -238,32 +243,38 @@ class DataFrame extends ListBase<RecordRow> {
Column<T> call<T>(String colName, {int start = 0, int? end}) =>
Column(columnIterable<T>(colName, start: start, end: end).toList());

/// Returns an iterable over the records of a column sliced as per [start]
/// and [end].
Iterable<T> columnIterable<T>(String colName, {int start = 0, int? end}) =>
sublist(start, end).map((row) => row[columnIndex(colName)]).cast<T>();

DataFrame withColumns(List<String> columnNames) => DataFrame._copied(
ElementPositionTrackingList(columnNames),
columnNames.map((e) => this(e)).transposed());

/// Returns an iterable over the columns.
Iterable<Column> columns() => _columnNames.map((e) => this(e));
Iterable<Column> columns() => _trackedColumnNames.map(call);

/// Grab a (typed) record sitting at dataframe[rowIndex][colName].
T record<T>(int rowIndex, String colName) =>
this[rowIndex][columnIndex(colName)] as T;

DataFrame rowsAt(Iterable<int> indices) =>
DataFrame._copied(_columnNames, indices.map((e) => this[e]).toList());
/// Returns a [DataFrame] comprised of a subset of present columns
/// as specified by [columnNames]
DataFrame withColumns(List<String> columnNames, {bool asView = true}) =>
_viewOrCopy(columnNames, columnNames.map((e) => this(e)).transposed(), asView);

DataFrame rowsAt(Iterable<int> indices, {bool asView = true}) =>
_viewOrCopy(_trackedColumnNames.l, indices.map((e) => this[e]).toList(), asView);

DataFrame rowsWhere(List<bool> mask) =>
DataFrame._copied(_columnNames, applyMask(mask).toList());
DataFrame rowsWhere(List<bool> mask, {bool asView = true}) =>
_viewOrCopy(_trackedColumnNames.l, applyMask(mask).toList(), asView);

DataFrame _viewOrCopy(List<String> columnNames, DataMatrix data, bool asView) =>
asView ? DataFrame._default(columnNames, data) : _copied(columnNames, data);

// **************** manipulation ******************

/// Add a new column to the end of the dataframe. The [records] have to be of the same length
/// as the dataframe.
void addColumn(String name, RecordCol records) {
if (_columnNames.contains(name)) {
if (_trackedColumnNames.contains(name)) {
throw ArgumentError('$name column does already exist');
}

Expand All @@ -276,60 +287,57 @@ class DataFrame extends ListBase<RecordRow> {
'Length of column records does not match the one of the data frame');
}

_columnNames.add(name);
_trackedColumnNames.add(name);
}

/// Remove a column from the dataframe.
/// Remove a column from the dataframe and return it.
RecordCol removeColumn(String name) {
final index = columnIndex(name);
_columnNames.removeAt(index);
_trackedColumnNames.removeAt(index);
return map((element) {
element.removeAt(index);
}).toList();
}

/// Transform the values corresponding to [name] as per [transformElement] in-place.
void transformColumn(
String name, dynamic Function(dynamic element) transformElement) {
this(name).forEachIndexed((i, element) {
void transformColumn(String name, dynamic Function(dynamic element) transformElement) {
columnIterable(name).forEachIndexed((i, element) {
this[i][columnIndex(name)] = transformElement(element);
});
}

/// Add a new row represented by [rowMap] of the structure {columnName: record}
/// to the end of the dataframe.
void addRowFromMap(RecordRowMap rowMap) =>
add([for (final name in _columnNames) rowMap[name]]);
add([for (final name in _trackedColumnNames) rowMap[name]]);

// ************ map representations *************

/// Returns a list of {columnName: value} Map-representations for each row.
List<RecordRowMap> rowMaps() =>
[for (final row in this) Map.fromIterables(_columnNames, row)];
[for (final row in this) Map.fromIterables(_trackedColumnNames, row)];

/// Returns a {columnName: columnData} representation.
Map<String, RecordCol> columnMap() => Map.fromIterable(
_columnNames,
_trackedColumnNames,
value: (name) => this(name),
);

// ************ copying *************

/// Returns a deep copy of the dataframe.
DataFrame copy() => DataFrame._copied(_columnNames, this);
/// Returns a copy of the instance.
DataFrame copy() => _copied(columnNames, this);

DataFrame._copied(
ElementPositionTrackingList<String> columns, DataMatrix data)
: this._columnNames = columns.copy(),
super(ListListExtensions(data).copy());
DataFrame _copied(List<String> names, DataMatrix data) =>
DataFrame._default(names.copy(), data.copy());

// ************** slicing ****************

/// Returns a new, row-sliced dataframe.
DataFrame sliced(int start, [int? end]) =>
DataFrame._copied(_columnNames, sublist(start, end));
/// Returns a new, row-sliced instance.
DataFrame sliced(int start, {int? end, bool asView = true}) =>
_viewOrCopy(columnNames, sublist(start, end), asView);

/// Slice dataframe in-place.
/// Row-slice instance in-place.
void slice(int start, [int? end]) {
if (start != 0) removeRange(0, start);
if (end != null) removeRange(end, length);
Expand All @@ -350,8 +358,8 @@ class DataFrame extends ListBase<RecordRow> {
{bool ascending = true,
bool nullsFirst = true,
Comparator<Record>? compareRecords}) =>
DataFrame._copied(
_columnNames,
_copied(
_trackedColumnNames,
_sort(colName,
inPlace: false,
ascending: ascending,
Expand Down Expand Up @@ -406,9 +414,9 @@ class DataFrame extends ListBase<RecordRow> {

// ************* Object overrides ******************

/// Returns hashCode accounting for both the [_data] and [_columnNames]
/// Returns hashCode accounting for both the [_data] and [_trackedColumnNames]
@override
int get hashCode => l.hashCode + _columnNames.hashCode;
int get hashCode => l.hashCode + _trackedColumnNames.hashCode;

@override
bool operator ==(Object other) =>
Expand Down
4 changes: 4 additions & 0 deletions lib/src/utils/element_position_tracking_list.dart
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class ElementPositionTrackingList<T> extends ListBase<T> {

// *************** overrides *******************

@override
bool contains(Object? element) =>
_object2Index.containsKey(element);

@override
void add(T element) {
super.add(element);
Expand Down
20 changes: 10 additions & 10 deletions test/src/dataframe_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ DataFrame _getDF() => DataFrame.fromRowMaps([
void main() {
group('constructors', (){
test('default', () {
expect(() => DataFrame(['b'], []), throwsArgumentError);
expect(() => DataFrame.fromNamesAndData(['b'], []), throwsArgumentError);
expect(
() => DataFrame([
() => DataFrame.fromNamesAndData([
'b'
], [
[888, 1]
Expand Down Expand Up @@ -150,7 +150,7 @@ void main() {
'2 | null 8 ');

final df_with_longer_elements_than_column_names =
DataFrame([
DataFrame.fromNamesAndData([
'a',
'b'
], [
Expand All @@ -170,7 +170,7 @@ void main() {
..slice(0, 30);
expect(df.length, 30);

final sliced = df.sliced(5, 25);
final sliced = df.sliced(5, end: 25);
expect(sliced.length, 20);

// Ensure disentanglement of copied properties
Expand Down Expand Up @@ -213,7 +213,7 @@ void main() {
});

test('data access', () {
final df = DataFrame(
final df = DataFrame.fromNamesAndData(
['a', 'b', 'c'],
[
[43, 'omg', null],
Expand Down Expand Up @@ -254,7 +254,7 @@ void main() {

// .withColumns + .sliced
expect(
df.withColumns(['a', 'b']).sliced(1, 3).toString(),
df.withColumns(['a', 'b']).sliced(1, end: 3).toString(),
' a b \n'
'0 | 701 \n'
'1 | -9 ubiquitous'
Expand Down Expand Up @@ -342,7 +342,7 @@ void main() {
group('.toCsv', () {
test('default', () async {
final outputCsvPath = outputFilePath('out.csv');
final df = DataFrame([
final df = DataFrame.fromNamesAndData([
'a',
'b',
'c'
Expand All @@ -358,7 +358,7 @@ void main() {

test('with null', () async {
final outputCsvPath = outputFilePath('out1.csv');
final df = DataFrame([
final df = DataFrame.fromNamesAndData([
'a',
'b',
'c'
Expand All @@ -382,7 +382,7 @@ void main() {

test('with single quote including strings', () async {
final outputCsvPath = outputFilePath('out3.csv');
final df = DataFrame([
final df = DataFrame.fromNamesAndData([
'a',
'b',
'c'
Expand All @@ -398,7 +398,7 @@ void main() {

test('without header', () async {
final outputCsvPath = outputFilePath('out4.csv');
final df = DataFrame([
final df = DataFrame.fromNamesAndData([
'a',
'b',
'c'
Expand Down

0 comments on commit 44a79c3

Please sign in to comment.