Skip to content

Commit

Permalink
added in standard conversions from types to ReadOnlyMemory<char> (#5106)
Browse files Browse the repository at this point in the history
* added in standard conversions from types to ReadOnlyMemory<char>

* fixed issues with differences in tostring of .netcore 3

* removing RunSpecificTest test attribute

* added comments into documentation about type changes
  • Loading branch information
michaelgsharp authored May 28, 2020
1 parent 65f6c5f commit 8b54a7b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 6 deletions.
20 changes: 14 additions & 6 deletions docs/code/IDataViewTypeSystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ true/false values. The `BooleanDataViewType` class derives from

The default value of `BL` is `false`, and it has no `NA` value.

There is a standard conversion from `TX` to `BL`. There are standard
There are standard conversions from `TX` to `BL`, and from `BL` to `TX`. There are standard
conversions from `BL` to all signed integer and floating point numeric types,
with `false` mapping to zero and `true` mapping to one.

Expand Down Expand Up @@ -332,7 +332,8 @@ values being the canonical `NA` values.

There are standard conversions from each floating-point type to the other
floating-point type. There are also standard conversions from text to each
floating-point type and from each integer type to each floating-point type.
floating-point type, from floating-point type to text types, and from each
integer type to each floating-point type.

### Signed Integer Types

Expand All @@ -342,8 +343,8 @@ default value of each of these is zero.

There are standard conversions from each signed integer type to every other
signed integer type. There are also standard conversions from text to each
signed integer type and from each signed integer type to each floating-point
type.
signed integer type, from each signed integer type to text, and from each
signed integer type to each floating-point type.

Note that we have not defined standard conversions from floating-point types
to signed integer types.
Expand All @@ -357,8 +358,8 @@ have an `NA` value.

There are standard conversions from each unsigned integer type to every other
unsigned integer type. There are also standard conversions from text to each
unsigned integer type and from each unsigned integer type to each floating-
point type.
unsigned integer type, each unsigned integer type to text, and from each unsigned
integer type to each floating-point type.

Note that we have not defined standard conversions from floating-point types
to unsigned integer types, or between signed integer types and unsigned
Expand Down Expand Up @@ -541,6 +542,13 @@ case, it is simple to map implicit items (suppressed due to sparsity) to zero.
In the former case, these items are first mapped to the empty text value. To
get the same result, we need empty text to map to zero.

### To Text

There are standard conversions to `TX` from the standard primitive types,
`R4`, `R8`, `I1`, `I2`, `I4`, `I8`, `U1`, `U2`, `U4`, `U8`, `BL`, `TS`, `DT`, and `DZ`.
`R4` uses the G7 format and `R8` uses the G17 format. `BL` converts to "True" or "False".
`TS` uses the format "0:c". `DT` and `DZ` use the "0:o" format.

### Floating Point

There are standard conversions from `R4` to `R8` and from `R8` to `R4`. These
Expand Down
33 changes: 33 additions & 0 deletions src/Microsoft.ML.Data/Data/Conversion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<I1, R8>(Convert);
AddAux<I1, SB>(Convert);
AddStd<I1, BL>(Convert);
AddStd<I1, TX>(Convert);

AddStd<I2, I1>(Convert);
AddStd<I2, I2>(Convert);
Expand All @@ -132,6 +133,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<I2, R8>(Convert);
AddAux<I2, SB>(Convert);
AddStd<I2, BL>(Convert);
AddStd<I2, TX>(Convert);

AddStd<I4, I1>(Convert);
AddStd<I4, I2>(Convert);
Expand All @@ -141,6 +143,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<I4, R8>(Convert);
AddAux<I4, SB>(Convert);
AddStd<I4, BL>(Convert);
AddStd<I4, TX>(Convert);

AddStd<I8, I1>(Convert);
AddStd<I8, I2>(Convert);
Expand All @@ -150,6 +153,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<I8, R8>(Convert);
AddAux<I8, SB>(Convert);
AddStd<I8, BL>(Convert);
AddStd<I8, TX>(Convert);

AddStd<U1, U1>(Convert);
AddStd<U1, U2>(Convert);
Expand All @@ -160,6 +164,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<U1, R8>(Convert);
AddAux<U1, SB>(Convert);
AddStd<U1, BL>(Convert);
AddStd<U1, TX>(Convert);

AddStd<U2, U1>(Convert);
AddStd<U2, U2>(Convert);
Expand All @@ -170,6 +175,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<U2, R8>(Convert);
AddAux<U2, SB>(Convert);
AddStd<U2, BL>(Convert);
AddStd<U2, TX>(Convert);

AddStd<U4, U1>(Convert);
AddStd<U4, U2>(Convert);
Expand All @@ -180,6 +186,7 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<U4, R8>(Convert);
AddAux<U4, SB>(Convert);
AddStd<U4, BL>(Convert);
AddStd<U4, TX>(Convert);

AddStd<U8, U1>(Convert);
AddStd<U8, U2>(Convert);
Expand All @@ -190,23 +197,27 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<U8, R8>(Convert);
AddAux<U8, SB>(Convert);
AddStd<U8, BL>(Convert);
AddStd<U8, TX>(Convert);

AddStd<UG, U1>(Convert);
AddStd<UG, U2>(Convert);
AddStd<UG, U4>(Convert);
AddStd<UG, U8>(Convert);
// REVIEW: Conversion from UG to R4/R8, should we?
AddAux<UG, SB>(Convert);
AddStd<UG, TX>(Convert);

AddStd<R4, R4>(Convert);
AddStd<R4, BL>(Convert);
AddStd<R4, R8>(Convert);
AddAux<R4, SB>(Convert);
AddStd<R4, TX>(Convert);

AddStd<R8, R4>(Convert);
AddStd<R8, R8>(Convert);
AddStd<R8, BL>(Convert);
AddAux<R8, SB>(Convert);
AddStd<R8, TX>(Convert);

AddStd<TX, I1>(Convert);
AddStd<TX, U1>(Convert);
Expand Down Expand Up @@ -234,22 +245,26 @@ private Conversions(DoubleParser.OptionFlags doubleParserOptionFlags = DoublePar
AddStd<BL, R8>(Convert);
AddStd<BL, BL>(Convert);
AddAux<BL, SB>(Convert);
AddStd<BL, TX>(Convert);

AddStd<TS, I8>(Convert);
AddStd<TS, R4>(Convert);
AddStd<TS, R8>(Convert);
AddAux<TS, SB>(Convert);
AddStd<TS, TX>(Convert);

AddStd<DT, I8>(Convert);
AddStd<DT, R4>(Convert);
AddStd<DT, R8>(Convert);
AddStd<DT, DT>(Convert);
AddAux<DT, SB>(Convert);
AddStd<DT, TX>(Convert);

AddStd<DZ, I8>(Convert);
AddStd<DZ, R4>(Convert);
AddStd<DZ, R8>(Convert);
AddAux<DZ, SB>(Convert);
AddStd<DZ, TX>(Convert);

AddIsNA<R4>(IsNA);
AddIsNA<R8>(IsNA);
Expand Down Expand Up @@ -921,6 +936,24 @@ public void Convert(in BL src, ref SB dst)
public void Convert(in DZ src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); }
#endregion ToStringBuilder

#region ToTX
public void Convert(in I1 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in I2 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in I4 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in I8 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in U1 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in U2 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in U4 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in U8 src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in UG src, ref TX dst) => dst = string.Format("0x{0:x16}{1:x16}", src.High, src.Low).AsMemory();
public void Convert(in R4 src, ref TX dst) => dst = src.ToString("G7", CultureInfo.InvariantCulture).AsMemory();
public void Convert(in R8 src, ref TX dst) => dst = src.ToString("G17", CultureInfo.InvariantCulture).AsMemory();
public void Convert(in BL src, ref TX dst) => dst = src.ToString().AsMemory();
public void Convert(in TS src, ref TX dst) => dst = string.Format("{0:c}", src).AsMemory();
public void Convert(in DT src, ref TX dst) => string.Format("{0:o}", src).AsMemory();
public void Convert(in DZ src, ref TX dst) => string.Format("{0:o}", src).AsMemory();
#endregion ToTX

#region ToBL
public void Convert(in R8 src, ref BL dst) => dst = System.Convert.ToBoolean(src);
public void Convert(in R4 src, ref BL dst) => dst = System.Convert.ToBoolean(src);
Expand Down
20 changes: 20 additions & 0 deletions test/Microsoft.ML.Tests/Transformers/ConvertTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,26 @@ public void TestConvertWorkout()
var expectedConvertedValues = ML.Data.LoadFromEnumerable(allTypesDataConverted);

CheckSameValues(expectedConvertedValues, actualConvertedValues);

var allInputTypesData = new[] { new { A = (sbyte)sbyte.MinValue, B = (byte)byte.MinValue, C = double.MaxValue, D = float.MinValue, E = "already a string", F = false } };
var allInputTypesDataView = ML.Data.LoadFromEnumerable(allInputTypesData);
var allInputTypesDataPipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnOptions("A1", DataKind.String, "A"),
new TypeConvertingEstimator.ColumnOptions("B1", DataKind.String, "B"),
new TypeConvertingEstimator.ColumnOptions("C1", DataKind.String, "C"),
new TypeConvertingEstimator.ColumnOptions("D1", DataKind.String, "D"),
new TypeConvertingEstimator.ColumnOptions("E1", DataKind.String, "E"),
new TypeConvertingEstimator.ColumnOptions("F1", DataKind.String, "F"),
});

var convertedValues = allInputTypesDataPipe.Fit(allInputTypesDataView).Transform(allInputTypesDataView);

var expectedValuesData = new[] { new { A = (sbyte)sbyte.MinValue, B = (byte)byte.MinValue, C = double.MaxValue, D = float.MinValue, E = "already a string", F = false,
A1 = "-128", B1 = "0", C1 = "1.7976931348623157E+308", D1 = "-3.402823E+38", E1 = "already a string", F1 = "False" } };
var expectedValuesDataView = ML.Data.LoadFromEnumerable(expectedValuesData);

CheckSameValues(expectedValuesDataView, convertedValues);
TestEstimatorCore(allInputTypesDataPipe, allInputTypesDataView);

Done();
}

Expand Down

0 comments on commit 8b54a7b

Please sign in to comment.