Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace DV data type system with .NET standard type system. #863

Merged
merged 44 commits into from
Sep 19, 2018
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
67f96a7
Replace DvDateTime, DvDateTimeZone, DvTimeSpan with .NET standard dat…
codemzs Sep 7, 2018
f13ba07
Replace DvInt1, DvInt2, DvInt4 and DvInt8 with .NET standard datatypes.
codemzs Sep 7, 2018
7d115d1
Replace DvBool with .NET standard datatype.
codemzs Sep 7, 2018
2a8eeb5
Replace DvText with ReadOnlyMemory<char>.
codemzs Sep 7, 2018
8cb201d
Merge branch 'DvInts' of https://github.com/codemzs/machinelearning i…
codemzs Sep 7, 2018
92106bf
merge DvInts into DvText
codemzs Sep 7, 2018
300253b
Merge branch 'DvBool2' of https://github.com/codemzs/machinelearning …
codemzs Sep 7, 2018
dca6f24
merge DvBool.
codemzs Sep 7, 2018
771f01d
Merge branch 'DvDateTime3' of https://github.com/codemzs/machinelearn…
codemzs Sep 7, 2018
323a40a
merge DvDateTime et. al.
codemzs Sep 7, 2018
12cc1a5
fix tests.
codemzs Sep 7, 2018
0511d74
Disable DataTypes test on linux temporarily.
codemzs Sep 8, 2018
c39b6e6
PR feedback.
codemzs Sep 10, 2018
d435982
cache span object in text loader to improve performance.
codemzs Sep 11, 2018
b5d3f67
Change ROM<char> to ROS<char> in DoubleParser and some misc. PR comme…
codemzs Sep 11, 2018
2d3d03d
PR feedback.
codemzs Sep 12, 2018
30c0ba8
PR feedback.
codemzs Sep 12, 2018
8e8541c
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 12, 2018
4f23011
merge master.
codemzs Sep 12, 2018
78e65fb
clean up and some caching.
codemzs Sep 12, 2018
9fa9060
some more caching.
codemzs Sep 12, 2018
8c80530
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 12, 2018
d45bc2c
Merge master.
codemzs Sep 12, 2018
25c3144
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 13, 2018
c28a2f5
perf improvement.
codemzs Sep 13, 2018
22e0cd0
performance improvements.
codemzs Sep 13, 2018
bf2cd7a
Improve StringBulder.Append(Span) perf
eerhardt Sep 13, 2018
2753d96
Split using IndexOf.
eerhardt Sep 13, 2018
48a38fe
AddLowerCaseToStringBuilder now accepts Span
eerhardt Sep 14, 2018
6b84a22
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 18, 2018
01d7533
merge from master.
codemzs Sep 18, 2018
bd2dc3c
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 18, 2018
000af4b
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 19, 2018
7c8b07e
PR feedback.
codemzs Sep 19, 2018
c7e62ba
cleanup.
codemzs Sep 19, 2018
8a3a7f5
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 19, 2018
33e4b3e
merge from master.
codemzs Sep 19, 2018
44b33b8
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 19, 2018
d3cc8c4
misc.
codemzs Sep 19, 2018
0b68409
fix failed test due to merge.
codemzs Sep 19, 2018
201349e
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
codemzs Sep 19, 2018
942cb6c
PR feedback.
codemzs Sep 19, 2018
ededeef
PR feedback.
codemzs Sep 19, 2018
4d82de0
PR feedback.
codemzs Sep 19, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
<BenchmarkDotNetVersion>0.11.1</BenchmarkDotNetVersion>
<TensorFlowVersion>1.10.0</TensorFlowVersion>
<SystemCollectionsImmutableVersion>1.5.0</SystemCollectionsImmutableVersion>

<SystemMemoryVersion>4.5.1</SystemMemoryVersion>
<MicrosoftCodeAnalysisCSharpVersion>2.9.0</MicrosoftCodeAnalysisCSharpVersion>
<MicrosoftCSharpVersion>4.5.0</MicrosoftCSharpVersion>
<SystemCompositionVersion>1.2.0</SystemCompositionVersion>
Expand Down
1 change: 1 addition & 0 deletions pkg/Microsoft.ML/Microsoft.ML.nupkgproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<PackageReference Include="System.Reflection.Emit.Lightweight" Version="$(SystemReflectionEmitLightweightPackageVersion)" />
<PackageReference Include="System.Threading.Tasks.Dataflow" Version="$(SystemThreadingTasksDataflowPackageVersion)" />
<PackageReference Include="System.CodeDom" Version="$(SystemCodeDomPackageVersion)" />
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
</ItemGroup>

<ItemGroup>
Expand Down
5 changes: 2 additions & 3 deletions src/Microsoft.ML.Api/ApiUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ private static OpCode GetAssignmentOpCode(Type t)
{
// REVIEW: This should be a Dictionary<Type, OpCode> based solution.
// DvTypes, strings, arrays, all nullable types, VBuffers and UInt128.
if (t == typeof(DvInt8) || t == typeof(DvInt4) || t == typeof(DvInt2) || t == typeof(DvInt1) ||
t == typeof(DvBool) || t == typeof(DvText) || t == typeof(string) || t.IsArray ||
if (t == typeof(ReadOnlyMemory<char>) || t == typeof(string) || t.IsArray ||
(t.IsGenericType && t.GetGenericTypeDefinition() == typeof(VBuffer<>)) ||
(t.IsGenericType && t.GetGenericTypeDefinition() == typeof(Nullable<>)) ||
t == typeof(DvDateTime) || t == typeof(DvDateTimeZone) || t == typeof(DvTimeSpan) || t == typeof(UInt128))
t == typeof(DateTime) || t == typeof(DateTimeOffset) || t == typeof(TimeSpan) || t == typeof(UInt128))
{
return OpCodes.Stobj;
}
Expand Down
143 changes: 17 additions & 126 deletions src/Microsoft.ML.Api/DataViewConstructionUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -125,61 +125,11 @@ private Delegate CreateGetter(int index)
if (outputType.IsArray)
{
Ch.Assert(colType.IsVector);
// String[] -> VBuffer<DvText>
// String[] -> ReadOnlyMemory<char>
if (outputType.GetElementType() == typeof(string))
{
Ch.Assert(colType.ItemType.IsText);
return CreateConvertingArrayGetterDelegate<String, DvText>(index, x => x == null ? DvText.NA : new DvText(x));
}
else if (outputType.GetElementType() == typeof(int))
{
Ch.Assert(colType.ItemType == NumberType.I4);
return CreateConvertingArrayGetterDelegate<int, DvInt4>(index, x => x);
}
else if (outputType.GetElementType() == typeof(int?))
{
Ch.Assert(colType.ItemType == NumberType.I4);
return CreateConvertingArrayGetterDelegate<int?, DvInt4>(index, x => x ?? DvInt4.NA);
}
else if (outputType.GetElementType() == typeof(long))
{
Ch.Assert(colType.ItemType == NumberType.I8);
return CreateConvertingArrayGetterDelegate<long, DvInt8>(index, x => x);
}
else if (outputType.GetElementType() == typeof(long?))
{
Ch.Assert(colType.ItemType == NumberType.I8);
return CreateConvertingArrayGetterDelegate<long?, DvInt8>(index, x => x ?? DvInt8.NA);
}
else if (outputType.GetElementType() == typeof(short))
{
Ch.Assert(colType.ItemType == NumberType.I2);
return CreateConvertingArrayGetterDelegate<short, DvInt2>(index, x => x);
}
else if (outputType.GetElementType() == typeof(short?))
{
Ch.Assert(colType.ItemType == NumberType.I2);
return CreateConvertingArrayGetterDelegate<short?, DvInt2>(index, x => x ?? DvInt2.NA);
}
else if (outputType.GetElementType() == typeof(sbyte))
{
Ch.Assert(colType.ItemType == NumberType.I1);
return CreateConvertingArrayGetterDelegate<sbyte, DvInt1>(index, x => x);
}
else if (outputType.GetElementType() == typeof(sbyte?))
{
Ch.Assert(colType.ItemType == NumberType.I1);
return CreateConvertingArrayGetterDelegate<sbyte?, DvInt1>(index, x => x ?? DvInt1.NA);
}
else if (outputType.GetElementType() == typeof(bool))
{
Ch.Assert(colType.ItemType.IsBool);
return CreateConvertingArrayGetterDelegate<bool, DvBool>(index, x => x);
}
else if (outputType.GetElementType() == typeof(bool?))
{
Ch.Assert(colType.ItemType.IsBool);
return CreateConvertingArrayGetterDelegate<bool?, DvBool>(index, x => x ?? DvBool.NA);
return CreateConvertingArrayGetterDelegate<string, ReadOnlyMemory<char>>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory<char>.Empty);
}

// T[] -> VBuffer<T>
Expand All @@ -193,7 +143,7 @@ private Delegate CreateGetter(int index)
else if (colType.IsVector)
{
// VBuffer<T> -> VBuffer<T>
// REVIEW: Do we care about accomodating VBuffer<string> -> VBuffer<DvText>?
// REVIEW: Do we care about accomodating VBuffer<string> -> ReadOnlyMemory<char>?
Ch.Assert(outputType.IsGenericType);
Ch.Assert(outputType.GetGenericTypeDefinition() == typeof(VBuffer<>));
Ch.Assert(outputType.GetGenericArguments()[0] == colType.ItemType.RawType);
Expand All @@ -204,70 +154,11 @@ private Delegate CreateGetter(int index)
{
if (outputType == typeof(string))
{
// String -> DvText
// String -> ReadOnlyMemory<char>
Ch.Assert(colType.IsText);
return CreateConvertingGetterDelegate<String, DvText>(index, x => x == null ? DvText.NA : new DvText(x));
}
else if (outputType == typeof(bool))
{
// Bool -> DvBool
Ch.Assert(colType.IsBool);
return CreateConvertingGetterDelegate<bool, DvBool>(index, x => x);
}
else if (outputType == typeof(bool?))
{
// Bool? -> DvBool
Ch.Assert(colType.IsBool);
return CreateConvertingGetterDelegate<bool?, DvBool>(index, x => x ?? DvBool.NA);
}
else if (outputType == typeof(int))
{
// int -> DvInt4
Ch.Assert(colType == NumberType.I4);
return CreateConvertingGetterDelegate<int, DvInt4>(index, x => x);
}
else if (outputType == typeof(int?))
{
// int? -> DvInt4
Ch.Assert(colType == NumberType.I4);
return CreateConvertingGetterDelegate<int?, DvInt4>(index, x => x ?? DvInt4.NA);
}
else if (outputType == typeof(short))
{
// short -> DvInt2
Ch.Assert(colType == NumberType.I2);
return CreateConvertingGetterDelegate<short, DvInt2>(index, x => x);
}
else if (outputType == typeof(short?))
{
// short? -> DvInt2
Ch.Assert(colType == NumberType.I2);
return CreateConvertingGetterDelegate<short?, DvInt2>(index, x => x ?? DvInt2.NA);
}
else if (outputType == typeof(long))
{
// long -> DvInt8
Ch.Assert(colType == NumberType.I8);
return CreateConvertingGetterDelegate<long, DvInt8>(index, x => x);
}
else if (outputType == typeof(long?))
{
// long? -> DvInt8
Ch.Assert(colType == NumberType.I8);
return CreateConvertingGetterDelegate<long?, DvInt8>(index, x => x ?? DvInt8.NA);
}
else if (outputType == typeof(sbyte))
{
// sbyte -> DvInt1
Ch.Assert(colType == NumberType.I1);
return CreateConvertingGetterDelegate<sbyte, DvInt1>(index, x => x);
}
else if (outputType == typeof(sbyte?))
{
// sbyte? -> DvInt1
Ch.Assert(colType == NumberType.I1);
return CreateConvertingGetterDelegate<sbyte?, DvInt1>(index, x => x ?? DvInt1.NA);
return CreateConvertingGetterDelegate<String, ReadOnlyMemory<char>>(index, x => x != null ? x.AsMemory() : ReadOnlyMemory<char>.Empty);
}

// T -> T
if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(Nullable<>))
Ch.Assert(colType.RawType == Nullable.GetUnderlyingType(outputType));
Expand Down Expand Up @@ -805,12 +696,12 @@ public override ValueGetter<TDst> GetGetter<TDst>()
var itemType = typeT.GetElementType();
var dstItemType = typeof(TDst).GetGenericArguments()[0];

// String[] -> VBuffer<DvText>
// String[] -> VBuffer<ReadOnlyMemory<char>>
if (itemType == typeof(string))
{
Contracts.Check(dstItemType == typeof(DvText));
Contracts.Check(dstItemType == typeof(ReadOnlyMemory<char>));

ValueGetter<VBuffer<DvText>> method = GetStringArray;
ValueGetter<VBuffer<ReadOnlyMemory<char>>> method = GetStringArray;
return method as ValueGetter<TDst>;
}

Expand All @@ -825,7 +716,7 @@ public override ValueGetter<TDst> GetGetter<TDst>()
if (MetadataType.IsVector)
{
// VBuffer<T> -> VBuffer<T>
// REVIEW: Do we care about accomodating VBuffer<string> -> VBuffer<DvText>?
// REVIEW: Do we care about accomodating VBuffer<string> -> VBuffer<ReadOnlyMemory<char>>?

Contracts.Assert(typeT.IsGenericType);
Contracts.Check(typeof(TDst).IsGenericType);
Expand All @@ -845,9 +736,9 @@ public override ValueGetter<TDst> GetGetter<TDst>()
{
if (typeT == typeof(string))
{
// String -> DvText
// String -> ReadOnlyMemory<char>
Contracts.Assert(MetadataType.IsText);
ValueGetter<DvText> m = GetString;
ValueGetter<ReadOnlyMemory<char>> m = GetString;
return m as ValueGetter<TDst>;
}
// T -> T
Expand All @@ -861,14 +752,14 @@ public class TElement
{
}

private void GetStringArray(ref VBuffer<DvText> dst)
private void GetStringArray(ref VBuffer<ReadOnlyMemory<char>> dst)
{
var value = (string[])(object)Value;
var n = Utils.Size(value);
dst = new VBuffer<DvText>(n, Utils.Size(dst.Values) < n ? new DvText[n] : dst.Values, dst.Indices);
dst = new VBuffer<ReadOnlyMemory<char>>(n, Utils.Size(dst.Values) < n ? new ReadOnlyMemory<char>[n] : dst.Values, dst.Indices);

for (int i = 0; i < n; i++)
dst.Values[i] = new DvText(value[i]);
dst.Values[i] = value[i].AsMemory();

}

Expand All @@ -890,9 +781,9 @@ private ValueGetter<VBuffer<TDst>> GetVBufferGetter<TDst>()
return (ref VBuffer<TDst> dst) => castValue.CopyTo(ref dst);
}

private void GetString(ref DvText dst)
private void GetString(ref ReadOnlyMemory<char> dst)
{
dst = new DvText((string)(object)Value);
dst = ((string)(object)Value).AsMemory();
}

private void GetDirectValue<TDst>(ref TDst dst)
Expand Down
Loading