Closed
Description
I'm trying out the sample shown here. However, whenever I try to train the model I get an error: "The size of input lines is not consistent". This is using the exact files that are specified in the tutorial so I'm not sure where I'm going wrong - any ideas?
#r "netstandard"
#load @"C:\Users\Isaac\Source\Repos\scratchpad\.paket\load\netstandard2.0\ML\ml.group.fsx"
open Microsoft.ML
open Microsoft.ML.Runtime.Api
open Microsoft.ML.Transforms
open Microsoft.ML.Trainers
let dataPath = @"data\imdb_labelled.txt"
let testDataPath = @"data\yelp_labelled.txt"
type SentimentData =
{ [<Column(ordinal = "0")>] SentimentText : string
[<Column(ordinal = "1", name = "Label")>] Sentiment : float }
[<CLIMutable>]
type SentimentPrediction =
{ [<ColumnName "PredictedLabel">] Sentiment : bool }
let pipeline = LearningPipeline()
pipeline.Add(TextLoader<SentimentData>(dataPath, useHeader = false, separator = "tab"))
pipeline.Add(TextFeaturizer("Features", "SentimentText"))
pipeline.Add(FastTreeBinaryClassifier(NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2))
/// Pop!
let model = pipeline.Train<SentimentData, SentimentPrediction>()