Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make ColumnInference serializable #5611

Merged
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
c073e6b
upgrade to 3.1
LittleLittleCloud Apr 28, 2020
8f0fc1a
write inline data using invariantCulture
LittleLittleCloud Apr 28, 2020
e96d716
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Apr 29, 2020
8c17bbe
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud May 14, 2020
b2947f5
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud May 18, 2020
ff1c909
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud May 20, 2020
dad9055
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Jul 20, 2020
741d77a
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Jul 21, 2020
594c828
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Aug 26, 2020
cc4def8
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Sep 14, 2020
0610a9b
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Oct 12, 2020
6190825
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Oct 13, 2020
c2edc76
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Dec 10, 2020
6e0ca4f
Merge branch 'master' of https://github.com/dotnet/machinelearning
LittleLittleCloud Feb 2, 2021
3d3ba08
make column inference serializable
LittleLittleCloud Feb 2, 2021
4a82531
add test json
LittleLittleCloud Feb 2, 2021
6465ab9
add approvaltests
LittleLittleCloud Feb 2, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/Microsoft.ML.AutoML/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Collections.ObjectModel;
using Microsoft.ML.Data;
using Newtonsoft.Json;

namespace Microsoft.ML.AutoML
{
Expand All @@ -20,6 +21,7 @@ public sealed class ColumnInferenceResults
/// Can be used to instantiate a new <see cref="TextLoader" /> to load
/// data into an <see cref="IDataView" />.
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public TextLoader.Options TextLoaderOptions { get; internal set; }

/// <summary>
Expand All @@ -31,6 +33,7 @@ public sealed class ColumnInferenceResults
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public ColumnInformation ColumnInformation { get; internal set; }
}

Expand Down Expand Up @@ -90,31 +93,36 @@ public sealed class ColumnInformation
/// <remarks>
/// Categorical data columns should generally be columns that contain a small number of unique values.
/// </remarks>
public ICollection<string> CategoricalColumnNames { get; }
[JsonProperty]
public ICollection<string> CategoricalColumnNames { get; private set; }

/// <summary>
/// The dataset columns that are numeric.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> NumericColumnNames { get; }
[JsonProperty]
public ICollection<string> NumericColumnNames { get; private set; }

/// <summary>
/// The dataset columns that are text.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> TextColumnNames { get; }
[JsonProperty]
public ICollection<string> TextColumnNames { get; private set; }

/// <summary>
/// The dataset columns that AutoML should ignore.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> IgnoredColumnNames { get; }
[JsonProperty]
public ICollection<string> IgnoredColumnNames { get; private set; }

/// <summary>
/// The dataset columns that are image paths.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> ImagePathColumnNames { get; }
[JsonProperty]
public ICollection<string> ImagePathColumnNames { get; private set; }

public ColumnInformation()
{
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Assembly.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]

2 changes: 1 addition & 1 deletion src/Microsoft.ML.CodeGenerator/Assembly.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
[assembly: InternalsVisibleTo("mlnet.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
18 changes: 18 additions & 0 deletions test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ApprovalTests;
using ApprovalTests.Namers;
using ApprovalTests.Reporters;
using FluentAssertions;
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Newtonsoft.Json;
using Xunit;
using Xunit.Abstractions;

Expand Down Expand Up @@ -222,5 +226,19 @@ public void InferColumnsFromMultilineInputFile()
Assert.Equal("description", result.ColumnInformation.TextColumnNames.First());
Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First());
}

[Fact]
[UseReporter(typeof(DiffReporter))]
[UseApprovalSubdirectory("ApprovalTests")]
public void Wiki_column_inference_result_should_be_serializable()
{
var wiki = Path.Combine("TestData", "wiki-column-inference.json");
using (var stream = new StreamReader(wiki))
{
var json = stream.ReadToEnd();
var columnInferenceResults = JsonConvert.DeserializeObject<ColumnInferenceResults>(json);
Approvals.Verify(JsonConvert.SerializeObject(columnInferenceResults, Formatting.Indented));
}
}
}
}
21 changes: 2 additions & 19 deletions test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,13 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="ApprovalTests" Version="5.2.4" />
<PackageReference Include="FluentAssertions" Version="5.10.3" />
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="$(TensorFlowVersion)" />
</ItemGroup>

<ItemGroup>
<None Update="TestData\DatasetWithNewlineBetweenQuotes.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithDefaultColumnNames.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithoutHeader.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\NameColumnIsOnlyFeatureDataset.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\BinaryDatasetWithBoolColumn.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithEmptyColumn.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TrivialMulticlassDataset.txt">
<None Update="TestData\**\*">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="xunit.runner.json">
Expand Down