Skip to content

Commit e5a19af

Browse files
add hasHeader to ColumnInference (#4922)
1 parent ae1b709 commit e5a19af

File tree

4 files changed

+56
-3
lines changed

4 files changed

+56
-3
lines changed

src/Microsoft.ML.AutoML/ColumnInference/ColumnInferenceApi.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ public static ColumnInferenceResults InferColumns(MLContext context, string path
3636
}
3737

3838
public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo,
39-
char? separatorChar, bool? allowQuotedStrings, bool? supportSparse, bool trimWhitespace, bool groupColumns)
39+
char? separatorChar, bool? allowQuotedStrings, bool? supportSparse, bool trimWhitespace, bool groupColumns, bool hasHeader = true)
4040
{
4141
var sample = TextFileSample.CreateFromFullFile(path);
4242
var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse);
43-
var typeInference = InferColumnTypes(context, sample, splitInference, true, null, columnInfo.LabelColumnName);
44-
return InferColumns(context, path, columnInfo, true, splitInference, typeInference, trimWhitespace, groupColumns);
43+
var typeInference = InferColumnTypes(context, sample, splitInference, hasHeader, null, columnInfo.LabelColumnName);
44+
return InferColumns(context, path, columnInfo, hasHeader, splitInference, typeInference, trimWhitespace, groupColumns);
4545
}
4646

4747
public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo, bool hasHeader,

test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.IO;
34
using System.Linq;
45
using Microsoft.ML.Data;
@@ -91,6 +92,35 @@ public void DatasetWithBoolColumn()
9192
Assert.Equal(DefaultColumnNames.Label, result.ColumnInformation.LabelColumnName);
9293
}
9394

95+
[Fact]
96+
public void InferDatasetWithoutHeader()
97+
{
98+
var context = new MLContext(1);
99+
var filePath = Path.Combine("TestData", "DatasetWithoutHeader.txt");
100+
var columnInfo = new ColumnInformation()
101+
{
102+
LabelColumnName = "col0",
103+
UserIdColumnName = "col1",
104+
ItemIdColumnName = "col2",
105+
};
106+
columnInfo.IgnoredColumnNames.Add("col4");
107+
var result = ColumnInferenceApi.InferColumns(context, filePath, columnInfo, ',', null, null, false, false, false);
108+
Assert.Equal(6, result.TextLoaderOptions.Columns.Count());
109+
110+
var labelColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col0");
111+
var userColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col1");
112+
var itemColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col2");
113+
var ignoreColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col4");
114+
115+
Assert.Equal(DataKind.String, labelColumn.DataKind);
116+
Assert.Equal(DataKind.Single, userColumn.DataKind);
117+
Assert.Equal(DataKind.Single, itemColumn.DataKind);
118+
Assert.Equal(DataKind.Single, ignoreColumn.DataKind);
119+
120+
Assert.Single(result.ColumnInformation.CategoricalColumnNames);
121+
Assert.Empty(result.ColumnInformation.TextColumnNames);
122+
}
123+
94124
[Fact]
95125
public void WhereNameColumnIsOnlyFeature()
96126
{

test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
<None Update="TestData\DatasetWithDefaultColumnNames.txt">
1515
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
1616
</None>
17+
<None Update="TestData\DatasetWithoutHeader.txt">
18+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
19+
</None>
1720
<None Update="TestData\NameColumnIsOnlyFeatureDataset.txt">
1821
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
1922
</None>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
CMT,1271,3.8,CRD,17.5,T
2+
CMT,474,1.5,CRD,8,T
3+
CMT,637,1.4,CRD,8.5,T
4+
CMT,181,0.6,CSH,4.5,T
5+
CMT,661,1.1,CRD,8.5,T
6+
CMT,935,9.6,CSH,27.5,T
7+
CMT,869,2.3,CRD,11.5,T
8+
CMT,454,1.4,CRD,7.5,T
9+
CMT,366,1.5,CSH,7.5,T
10+
VTS,1140,5.61,CSH,18.5,F
11+
VTS,120,0.67,CSH,4,F
12+
VTS,240,1.7,CRD,6.5,F
13+
VTS,660,2.52,CRD,10.5,F
14+
VTS,420,0.82,CSH,6,F
15+
VTS,420,1.04,CRD,6.5,F
16+
VTS,2280,18,CRD,52,F
17+
VTS,360,1.2,CRD,6.5,F
18+
VTS,660,2.22,CSH,10,F
19+
VTS,840,3.29,CSH,12.5,F
20+
VTS,540,1.85,CRD,8.5,F

0 commit comments

Comments
 (0)