5
5
using System ;
6
6
using System . IO ;
7
7
using System . Text ;
8
+ using Apache . Arrow ;
8
9
using Xunit ;
9
10
10
11
namespace Microsoft . Data . Analysis . Tests
11
12
{
12
13
public partial class DataFrameTests
13
14
{
15
+ internal static void VerifyColumnTypes ( DataFrame df , bool testArrowStringColumn = false )
16
+ {
17
+ foreach ( DataFrameColumn column in df . Columns )
18
+ {
19
+ Type dataType = column . DataType ;
20
+ if ( dataType == typeof ( bool ) )
21
+ {
22
+ Assert . IsType < BooleanDataFrameColumn > ( column ) ;
23
+
24
+ }
25
+ else if ( dataType == typeof ( decimal ) )
26
+ {
27
+ Assert . IsType < DecimalDataFrameColumn > ( column ) ;
28
+
29
+ }
30
+ else if ( dataType == typeof ( byte ) )
31
+ {
32
+ Assert . IsType < ByteDataFrameColumn > ( column ) ;
33
+
34
+ }
35
+ else if ( dataType == typeof ( char ) )
36
+ {
37
+ Assert . IsType < CharDataFrameColumn > ( column ) ;
38
+
39
+ }
40
+ else if ( dataType == typeof ( double ) )
41
+ {
42
+ Assert . IsType < DoubleDataFrameColumn > ( column ) ;
43
+
44
+ }
45
+ else if ( dataType == typeof ( float ) )
46
+ {
47
+ Assert . IsType < SingleDataFrameColumn > ( column ) ;
48
+
49
+ }
50
+ else if ( dataType == typeof ( int ) )
51
+ {
52
+ Assert . IsType < Int32DataFrameColumn > ( column ) ;
53
+
54
+ }
55
+ else if ( dataType == typeof ( long ) )
56
+ {
57
+
58
+ Assert . IsType < Int64DataFrameColumn > ( column ) ;
59
+ }
60
+ else if ( dataType == typeof ( sbyte ) )
61
+ {
62
+ Assert . IsType < SByteDataFrameColumn > ( column ) ;
63
+
64
+ }
65
+ else if ( dataType == typeof ( short ) )
66
+ {
67
+ Assert . IsType < Int16DataFrameColumn > ( column ) ;
68
+
69
+ }
70
+ else if ( dataType == typeof ( uint ) )
71
+ {
72
+ Assert . IsType < UInt32DataFrameColumn > ( column ) ;
73
+
74
+ }
75
+ else if ( dataType == typeof ( ulong ) )
76
+ {
77
+
78
+ Assert . IsType < UInt64DataFrameColumn > ( column ) ;
79
+ }
80
+ else if ( dataType == typeof ( ushort ) )
81
+ {
82
+ Assert . IsType < UInt16DataFrameColumn > ( column ) ;
83
+
84
+ }
85
+ else if ( dataType == typeof ( string ) )
86
+ {
87
+ if ( ! testArrowStringColumn )
88
+ {
89
+ Assert . IsType < StringDataFrameColumn > ( column ) ;
90
+ }
91
+ else
92
+ {
93
+ Assert . IsType < ArrowStringDataFrameColumn > ( column ) ;
94
+ }
95
+ }
96
+ else
97
+ {
98
+ throw new NotImplementedException ( "Unit test has to be updated" ) ;
99
+ }
100
+ }
101
+ }
102
+
14
103
[ Fact ]
15
104
public void TestReadCsvWithHeader ( )
16
105
{
@@ -28,11 +117,13 @@ Stream GetStream(string streamData)
28
117
Assert . Equal ( 4 , df . Rows . Count ) ;
29
118
Assert . Equal ( 7 , df . Columns . Count ) ;
30
119
Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
120
+ VerifyColumnTypes ( df ) ;
31
121
32
122
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
33
123
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
34
124
Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
35
125
Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
126
+ VerifyColumnTypes ( df ) ;
36
127
}
37
128
38
129
[ Fact ]
@@ -51,11 +142,13 @@ Stream GetStream(string streamData)
51
142
Assert . Equal ( 4 , df . Rows . Count ) ;
52
143
Assert . Equal ( 7 , df . Columns . Count ) ;
53
144
Assert . Equal ( "CMT" , df . Columns [ "Column0" ] [ 3 ] ) ;
145
+ VerifyColumnTypes ( df ) ;
54
146
55
147
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , header : false , numberOfRowsToRead : 3 ) ;
56
148
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
57
149
Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
58
150
Assert . Equal ( "CMT" , reducedRows . Columns [ "Column0" ] [ 2 ] ) ;
151
+ VerifyColumnTypes ( df ) ;
59
152
}
60
153
61
154
[ Fact ]
@@ -83,6 +176,7 @@ Stream GetStream(string streamData)
83
176
Assert . True ( typeof ( float ) == df . Columns [ 4 ] . DataType ) ;
84
177
Assert . True ( typeof ( string ) == df . Columns [ 5 ] . DataType ) ;
85
178
Assert . True ( typeof ( double ) == df . Columns [ 6 ] . DataType ) ;
179
+ VerifyColumnTypes ( df ) ;
86
180
87
181
foreach ( var column in df . Columns )
88
182
{
@@ -124,11 +218,13 @@ Stream GetStream(string streamData)
124
218
Assert . Equal ( 5 , df . Rows . Count ) ;
125
219
Assert . Equal ( 7 , df . Columns . Count ) ;
126
220
Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 4 ] ) ;
221
+ VerifyColumnTypes ( df ) ;
127
222
128
223
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , separator : '|' , numberOfRowsToRead : 3 ) ;
129
224
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
130
225
Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
131
226
Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
227
+ VerifyColumnTypes ( df ) ;
132
228
133
229
var nullRow = df . Rows [ 3 ] ;
134
230
Assert . Equal ( "" , nullRow [ 0 ] ) ;
@@ -159,11 +255,13 @@ Stream GetStream(string streamData)
159
255
Assert . Equal ( 5 , df . Rows . Count ) ;
160
256
Assert . Equal ( 7 , df . Columns . Count ) ;
161
257
Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 4 ] ) ;
258
+ VerifyColumnTypes ( df ) ;
162
259
163
260
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , separator : ';' , numberOfRowsToRead : 3 ) ;
164
261
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
165
262
Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
166
263
Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
264
+ VerifyColumnTypes ( df ) ;
167
265
168
266
var nullRow = df . Rows [ 3 ] ;
169
267
Assert . Equal ( "" , nullRow [ 0 ] ) ;
@@ -193,11 +291,13 @@ Stream GetStream(string streamData)
193
291
Assert . Equal ( 4 , df . Rows . Count ) ;
194
292
Assert . Equal ( 7 , df . Columns . Count ) ;
195
293
Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
294
+ VerifyColumnTypes ( df ) ;
196
295
197
296
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
198
297
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
199
298
Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
200
299
Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
300
+ VerifyColumnTypes ( df ) ;
201
301
}
202
302
203
303
[ Fact ]
@@ -235,11 +335,13 @@ Stream GetStream(string streamData)
235
335
Assert . Equal ( 4 , df . Rows . Count ) ;
236
336
Assert . Equal ( 6 , df . Columns . Count ) ;
237
337
Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
338
+ VerifyColumnTypes ( df ) ;
238
339
239
340
DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
240
341
Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
241
342
Assert . Equal ( 6 , reducedRows . Columns . Count ) ;
242
343
Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
344
+ VerifyColumnTypes ( df ) ;
243
345
244
346
}
245
347
}
0 commit comments