Skip to content

Commit d0664c1

Browse files
authored
Documentation fixes for the issues in #664 (#665)
* Documentation fixes described in issue #664 * this P: notation shouldn't matter, but it does. * regenerating the CSharpAPi * regenerating the entry points list and manifest
1 parent 1b2f181 commit d0664c1

File tree

9 files changed

+30
-26
lines changed

9 files changed

+30
-26
lines changed

src/Microsoft.ML.Data/Transforms/doc.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
This transform removes the entire row if any of the input columns have a missing value in that row.
1010
This preprocessing is required for many ML algorithms that cannot work with missing values.
1111
Useful if any missing entry invalidates the entire row.
12-
If the <see cref="Microsoft.ML.Runtime.Data.NAFilter.Defaults.Complement"/> is set to true, this transform would do the exact opposite,
12+
If the <see cref="P:Microsoft.ML.Transforms.MissingValuesRowDropper.Complement"/> is set to true, this transform would do the exact opposite,
1313
it will keep only the rows that have missing values.
1414
</remarks>
15-
<seealso cref="Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
15+
<seealso cref="T:Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
1616
</member>
1717
<example name="NAFilter">
1818
<example>
@@ -44,7 +44,7 @@
4444
<code language="csharp">
4545
pipeline.Add(new TextToKeyConverter((&quot;Column&quot;, &quot;OutColumn&quot;))
4646
{
47-
Sort = TermTransformSortOrder.Occurrence
47+
Sort = TermTransformSortOrder.Occurrence
4848
});
4949
</code>
5050
</example>
@@ -82,15 +82,15 @@
8282
It can be changed to true for known length vectors, but it results in an error if changed to false for variable length vectors.
8383
</para>
8484
</remarks>
85-
<seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"/>
85+
<seealso cref="T:Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"/>
8686
<seealso cref="T:Microsoft.ML.Data.DataKind"/>
8787
</member>
8888
<example name="NAHandle">
8989
<example>
9090
<code language="csharp">
9191
pipeline.Add(new MissingValueHandler(&quot;FeatureCol&quot;, &quot;CleanFeatureCol&quot;)
9292
{
93-
ReplaceWith = NAHandleTransformReplacementKind.Mean
93+
ReplaceWith = NAHandleTransformReplacementKind.Mean
9494
});
9595
</code>
9696
</example>

src/Microsoft.ML.FastTree/doc.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
<item><description>In case of a ranking problem, the instances are ordered by the output value of the ensemble.</description></item>
3030
</list>
3131
<para>For more information see:</para>
32-
<list>
32+
<list type="bullet">
3333
<item><description><a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting).</a></description></item>
3434
<item><description><a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine.</a></description></item>
3535
</list>

src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ namespace Microsoft.ML.Runtime.SymSgd
3232
{
3333
using TPredictor = IPredictorWithFeatureWeights<Float>;
3434

35+
/// <include file='doc.xml' path='doc/members/member[@name="SymSGD"]/*' />
3536
public sealed class SymSgdClassificationTrainer :
3637
TrainerBase<TPredictor>,
3738
ITrainer<TPredictor>
@@ -173,7 +174,11 @@ private TPredictor CreatePredictor(VBuffer<Float> weights, Float bias)
173174
return new ParameterMixingCalibratedPredictor(Host, predictor, new PlattCalibrator(Host, -1, 0));
174175
}
175176

176-
[TlcModule.EntryPoint(Name = "Trainers.SymSgdBinaryClassifier", Desc = "Train a symbolic SGD.", UserName = SymSgdClassificationTrainer.UserNameValue, ShortName = SymSgdClassificationTrainer.ShortName)]
177+
[TlcModule.EntryPoint(Name = "Trainers.SymSgdBinaryClassifier",
178+
Desc = "Train a symbolic SGD.",
179+
UserName = SymSgdClassificationTrainer.UserNameValue,
180+
ShortName = SymSgdClassificationTrainer.ShortName,
181+
XmlInclude = new[] { @"<include file='../Microsoft.ML.HalLearners/doc.xml' path='doc/members/member[@name=""SymSGD""]/*' />" })]
177182
public static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnvironment env, Arguments input)
178183
{
179184
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.Transforms/HashJoinTransform.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ private static KeyType GetItemType(int hashBits)
152152

153153
internal const string RegistrationName = "HashJoin";
154154

155-
internal const string Summary = "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. "
156-
+ "This is a part of the Dracula transform.";
155+
internal const string Summary = "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. ";
157156

158157
internal const string UserName = "Hash Join Transform";
159158

src/Microsoft.ML.Transforms/Text/doc.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
The n-grams are represented as count vectors, with vector slots corresponding to n-grams.
7575
Embedding ngrams in a vector space allows their contents to be compared in an efficient manner.
7676
The slot values in the vector can be weighted by the following factors:
77-
<list>
77+
<list type="bullet">
7878
<item>
7979
<term>term frequency</term>
8080
<description> the number of occurrences of the slot in the text</description>
@@ -169,7 +169,7 @@
169169
</para>
170170
<para>
171171
For more details please see original LightLDA paper, and its open source implementation.
172-
<list>
172+
<list type="bullet">
173173
<item><description><a href="http://arxiv.org/abs/1412.1576"> LightLDA: Big Topic Models on Modest Computer Clusters</a></description></item>
174174
<item><description><a href=" https://github.com/Microsoft/LightLDA">LightLDA </a></description></item>
175175
</list>

src/Microsoft.ML.Transforms/doc.xml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@
2929
Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array
3030
</summary>
3131
<remarks>
32-
<para>The CategoricalOneHotVectorizer transform passes through a data set, operating on text columns, to
32+
<para>
33+
The CategoricalOneHotVectorizer transform passes through a data set, operating on text columns, to
3334
build a dictionary of categories.
34-
For each row, the entire text string appearing in the input column is defined as a category.
35-
The output of this transform is an indicator vector.</para>
35+
For each row, the entire text string appearing in the input column is defined as a category.</para>
36+
<para>The output of this transform is an indicator vector.</para>
3637
Each slot in this vector corresponds to a category in the dictionary, so its length is the size of the built dictionary.
3738
<para>The CategoricalOneHotVectorizer can be applied to one or more columns, in which case it builds and uses a separate dictionary
3839
for each column that it is applied to.</para>
@@ -171,7 +172,7 @@
171172
This transform can transform either scalars or vectors (both fixed and variable size),
172173
creating output columns that indicate, through the true/false booleans whether the row has a missing value.
173174
</summary>
174-
<seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
175+
<seealso cref="T:Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
175176
</member>
176177
<example name="NAIndicator">
177178
<example>
@@ -192,14 +193,14 @@
192193
with either the default value, user input, or imputed values (min/max/mean are currently supported).
193194
Imputation modes are supported for vectors both by slot and across all slots.
194195
</remarks>
195-
<seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
196+
<seealso cref="T:Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
196197
</member>
197198
<example name="NAReplace">
198199
<example>
199200
<code language="csharp">
200201
pipeline.Add(new MissingValueSubstitutor(&quot;FeatureCol&quot;)
201202
{
202-
ReplacementKind = NAReplaceTransformReplacementKind.Mean
203+
ReplacementKind = NAReplaceTransformReplacementKind.Mean
203204
});
204205
</code>
205206
</example>
@@ -221,7 +222,7 @@
221222
<code language="csharp">
222223
pipeline.Add(new LpNormalizer(&quot;FeatureCol&quot;)
223224
{
224-
NormKind = LpNormNormalizerTransformNormalizerKind.L1Norm
225+
NormKind = LpNormNormalizerTransformNormalizerKind.L1Norm
225226
});
226227
</code>
227228
</example>
@@ -235,14 +236,15 @@
235236
</summary>
236237
<remarks>
237238
Scaling inputs to unit norms is a common operation for text classification or clustering.
238-
For more information see: <a href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf"></a>
239+
For more information see:
240+
<a href="http://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf">An Analysis of Single-Layer Networks in Unsupervised Feature Learning</a>
239241
</remarks>
240242
<seealso cref=" Microsoft.ML.Transforms.LpNormalizer"></seealso>
241243
<example>
242244
<code language="csharp">
243245
pipeline.Add(new GlobalContrastNormalizer(&quot;FeatureCol&quot;)
244246
{
245-
SubMean= false
247+
SubMean= false
246248
});
247249
</code>
248250
</example>
@@ -334,7 +336,7 @@
334336
<code language="csharp">
335337
pipeline.Add(new CombinerByContiguousGroupId
336338
{
337-
GroupKey = new []{&quot;Key1&quot;, &quot;Key2&quot; }
339+
GroupKey = new []{&quot;Key1&quot;, &quot;Key2&quot; }
338340
});
339341
</code>
340342
</example>

src/Microsoft.ML/CSharpApi.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9776,9 +9776,7 @@ public StochasticGradientDescentBinaryClassifierPipelineStep(Output output)
97769776
namespace Trainers
97779777
{
97789778

9779-
/// <summary>
9780-
/// Train a symbolic SGD.
9781-
/// </summary>
9779+
/// <include file='../Microsoft.ML.HalLearners/doc.xml' path='doc/members/member[@name="SymSGD"]/*' />
97829780
public sealed partial class SymSgdBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
97839781
{
97849782

test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Transforms.FeatureCombiner Combines all the features into one feature column. Mi
8888
Transforms.FeatureSelectorByCount Selects the slots for which the count of non-default values is greater than or equal to a threshold. Microsoft.ML.Runtime.EntryPoints.SelectFeatures CountSelect Microsoft.ML.Runtime.Data.CountFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
8989
Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Runtime.EntryPoints.SelectFeatures MutualInformationSelect Microsoft.ML.Runtime.Data.MutualInformationFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
9090
Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Runtime.Data.LpNormalization GcNormalize Microsoft.ML.Runtime.Data.LpNormNormalizerTransform+GcnArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
91-
Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. This is a part of the Dracula transform. Microsoft.ML.Runtime.Data.HashJoin Apply Microsoft.ML.Runtime.Data.HashJoinTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
91+
Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Runtime.Data.HashJoin Apply Microsoft.ML.Runtime.Data.HashJoinTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
9292
Transforms.ImageGrayscale Convert image into grayscale. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageGrayscale Microsoft.ML.Runtime.ImageAnalytics.ImageGrayscaleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
9393
Transforms.ImageLoader Load images from files. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageLoader Microsoft.ML.Runtime.ImageAnalytics.ImageLoaderTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
9494
Transforms.ImagePixelExtractor Extract color plane(s) from an image. Options include scaling, offset and conversion to floating point. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImagePixelExtractor Microsoft.ML.Runtime.ImageAnalytics.ImagePixelExtractorTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput

test/BaselineOutput/Common/EntryPoints/core_manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18062,7 +18062,7 @@
1806218062
},
1806318063
{
1806418064
"Name": "Transforms.HashConverter",
18065-
"Desc": "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. This is a part of the Dracula transform.",
18065+
"Desc": "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. ",
1806618066
"FriendlyName": "Hash Join Transform",
1806718067
"ShortName": "HashJoin",
1806818068
"Inputs": [

0 commit comments

Comments
 (0)