Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -257,77 +257,12 @@ private string GenerateConsumeModelCSFileContent(string namespaceValue)
return (trainerString, trainerUsings);
}

/// <summary>
/// Utilize <see cref="ML.CodeGenerator.Utilities.Utils.GenerateClassLabels(ColumnInferenceResults, IDictionary{string, CodeGeneratorSettings.ColumnMapping})"/>
/// </summary>
internal IList<string> GenerateClassLabels(IDictionary<string, CodeGeneratorSettings.ColumnMapping> columnMapping = default)
Copy link
Contributor Author

@mstfbl mstfbl May 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've decided to keep this GenerateClassLabels as it is possible that a CodeGenerator instance can call this function, as it does below:

var actualLabels = codeGenerator.GenerateClassLabels();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

{
IList<string> result = new List<string>();
foreach (var column in _columnInferenceResult.TextLoaderOptions.Columns)
{
StringBuilder sb = new StringBuilder();
int range = (column.Source[0].Max - column.Source[0].Min).Value;
bool isArray = range > 0;
sb.Append(Symbols.PublicSymbol);
sb.Append(Symbols.Space);

// if column is in columnMapping, use the type and name in that
DataKind dataKind;
string columnName;

if (columnMapping != null && columnMapping.ContainsKey(column.Name))
{
dataKind = columnMapping[column.Name].ColumnType;
columnName = columnMapping[column.Name].ColumnName;
}
else
{
dataKind = column.DataKind;
columnName = column.Name;
}
switch (dataKind)
{
case Microsoft.ML.Data.DataKind.String:
sb.Append(Symbols.StringSymbol);
break;
case Microsoft.ML.Data.DataKind.Boolean:
sb.Append(Symbols.BoolSymbol);
break;
case Microsoft.ML.Data.DataKind.Single:
sb.Append(Symbols.FloatSymbol);
break;
case Microsoft.ML.Data.DataKind.Double:
sb.Append(Symbols.DoubleSymbol);
break;
case Microsoft.ML.Data.DataKind.Int32:
sb.Append(Symbols.IntSymbol);
break;
case Microsoft.ML.Data.DataKind.UInt32:
sb.Append(Symbols.UIntSymbol);
break;
case Microsoft.ML.Data.DataKind.Int64:
sb.Append(Symbols.LongSymbol);
break;
case Microsoft.ML.Data.DataKind.UInt64:
sb.Append(Symbols.UlongSymbol);
break;
default:
throw new ArgumentException($"The data type '{column.DataKind}' is not handled currently.");
}

if (range > 0)
{
result.Add($"[ColumnName(\"{columnName}\"),LoadColumn({column.Source[0].Min}, {column.Source[0].Max}) VectorType({(range + 1)})]");
sb.Append("[]");
}
else
{
result.Add($"[ColumnName(\"{columnName}\"), LoadColumn({column.Source[0].Min})]");
}
sb.Append(" ");
sb.Append(Utils.Normalize(column.Name));
sb.Append("{get; set;}");
result.Add(sb.ToString());
result.Add("\r\n");
}
return result;
return Utils.GenerateClassLabels(_columnInferenceResult, columnMapping);
}

#region Model project
Expand Down
78 changes: 46 additions & 32 deletions src/Microsoft.ML.CodeGenerator/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.Formatting;
Expand Down Expand Up @@ -246,6 +247,8 @@ internal static int CreateSolutionFile(string solutionFile, string outputPath)
internal static IList<string> GenerateClassLabels(ColumnInferenceResults columnInferenceResults, IDictionary<string, CodeGeneratorSettings.ColumnMapping> columnMapping = default)
{
IList<string> result = new List<string>();
List<string> normalizedColumnNames = new List<string>();
bool duplicateColumnNamesExist = false;
foreach (var column in columnInferenceResults.TextLoaderOptions.Columns)
{
StringBuilder sb = new StringBuilder();
Expand All @@ -268,37 +271,9 @@ internal static IList<string> GenerateClassLabels(ColumnInferenceResults columnI
dataKind = column.DataKind;
columnName = column.Name;
}
switch (dataKind)
{
case Microsoft.ML.Data.DataKind.String:
sb.Append(Symbols.StringSymbol);
break;
case Microsoft.ML.Data.DataKind.Boolean:
sb.Append(Symbols.BoolSymbol);
break;
case Microsoft.ML.Data.DataKind.Single:
sb.Append(Symbols.FloatSymbol);
break;
case Microsoft.ML.Data.DataKind.Double:
sb.Append(Symbols.DoubleSymbol);
break;
case Microsoft.ML.Data.DataKind.Int32:
sb.Append(Symbols.IntSymbol);
break;
case Microsoft.ML.Data.DataKind.UInt32:
sb.Append(Symbols.UIntSymbol);
break;
case Microsoft.ML.Data.DataKind.Int64:
sb.Append(Symbols.LongSymbol);
break;
case Microsoft.ML.Data.DataKind.UInt64:
sb.Append(Symbols.UlongSymbol);
break;
default:
throw new ArgumentException($"The data type '{column.DataKind}' is not handled currently.");

}
sb.Append(GetSymbolOfDataKind(dataKind));

// Accomodate VectorType (array) columns
if (range > 0)
{
result.Add($"[ColumnName(\"{columnName}\"),LoadColumn({column.Source[0].Min}, {column.Source[0].Max}) VectorType({(range + 1)})]");
Expand All @@ -309,12 +284,51 @@ internal static IList<string> GenerateClassLabels(ColumnInferenceResults columnI
result.Add($"[ColumnName(\"{columnName}\"), LoadColumn({column.Source[0].Min})]");
}
sb.Append(" ");
sb.Append(Utils.Normalize(column.Name));
sb.Append("{get; set;}");
string normalizedColumnName = Utils.Normalize(column.Name);
// Put placeholder for normalized and unique version of column name
if (!duplicateColumnNamesExist && normalizedColumnNames.Contains(normalizedColumnName))
duplicateColumnNamesExist = true;
normalizedColumnNames.Add(normalizedColumnName);
result.Add(sb.ToString());
result.Add("\r\n");
}
for (int i = 1; i < result.Count; i+=3)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, these generated labels are generated using Utils.Normalize in GenerateSampleData, and hence are not added the _col_x suffix for conflicting column names. I am making an issue to track this change. Thanks!

{
// Get normalized column name for correctly typed class property name
// If duplicate column names exist, the only way to ensure all generated column names are unique is to add
// a differentiator depending on the column load order from dataset.
if (duplicateColumnNamesExist)
result[i] += normalizedColumnNames[i/3] + $"_col_{i/3}";
else
result[i] += normalizedColumnNames[i/3];
result[i] += "{get; set;}";
}
return result;
}

internal static string GetSymbolOfDataKind(DataKind dataKind)
{
switch (dataKind)
{
case DataKind.String:
return Symbols.StringSymbol;
case DataKind.Boolean:
return Symbols.BoolSymbol;
case DataKind.Single:
return Symbols.FloatSymbol;
case DataKind.Double:
return Symbols.DoubleSymbol;
case DataKind.Int32:
return Symbols.IntSymbol;
case DataKind.UInt32:
return Symbols.UIntSymbol;
case DataKind.Int64:
return Symbols.LongSymbol;
case DataKind.UInt64:
return Symbols.UlongSymbol;
default:
throw new ArgumentException($"The data type '{dataKind}' is not handled currently.");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//*****************************************************************************************
//* *
//* This is an auto-generated file by Microsoft ML.NET CLI (Command-Line Interface) tool. *
//* *
//*****************************************************************************************

using Microsoft.ML.Data;

namespace test.Model
{
public class ModelInput
{
[ColumnName("input_0"), LoadColumn(0)]
public int Id_col_0 { get; set; }


[ColumnName("input_1"), LoadColumn(1)]
public int MsAssetNum_col_1 { get; set; }


[ColumnName("input_2"), LoadColumn(2)]
public string Make_col_2 { get; set; }


[ColumnName("input_3"), LoadColumn(3)]
public string Model_col_3 { get; set; }


[ColumnName("input_4"), LoadColumn(4)]
public double Model_col_4 { get; set; }


[ColumnName("input_5"), LoadColumn(5)]
public string Work_category_col_5 { get; set; }


[ColumnName("Work category"), LoadColumn(6)]
public int Work_category_col_6 { get; set; }


[ColumnName("input_7"), LoadColumn(7)]
public bool IsDetachable_col_7 { get; set; }


}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//*****************************************************************************************
//* *
//* This is an auto-generated file by Microsoft ML.NET CLI (Command-Line Interface) tool. *
//* *
//*****************************************************************************************

using Microsoft.ML.Data;

namespace test.Model
{
public class ModelInput
{
[ColumnName("id"), LoadColumn(0)]
public int Id_col_0 { get; set; }


[ColumnName("MsAssetNum"), LoadColumn(1)]
public int MsAssetNum_col_1 { get; set; }


[ColumnName("Make"), LoadColumn(2)]
public string Make_col_2 { get; set; }


[ColumnName("Model"), LoadColumn(3)]
public string Model_col_3 { get; set; }


[ColumnName("model"), LoadColumn(4)]
public double Model_col_4 { get; set; }


[ColumnName("work category"), LoadColumn(5)]
public string Work_category_col_5 { get; set; }


[ColumnName("Work category"), LoadColumn(6)]
public int Work_category_col_6 { get; set; }


[ColumnName("IsDetachable"), LoadColumn(7)]
public bool IsDetachable_col_7 { get; set; }


}
}
Loading