Skip to content

Commit a9a813c

Browse files
committed
proper user of vectorizer and working example:
1 parent 6049fe6 commit a9a813c

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

examples/LanguageDetection.php

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,32 @@
66
use Phpml\Dataset\ArrayDataset;
77
use Phpml\FeatureExtraction\TokenCountVectorizer;
88
use Phpml\Tokenization\WordTokenizer;
9-
use Phpml\CrossValidation\RandomSplit;
9+
use Phpml\CrossValidation\StratifiedRandomSplit;
10+
use Phpml\FeatureExtraction\TfIdfTransformer;
1011
use Phpml\Metric\Accuracy;
1112
use Phpml\Classification\SVC;
1213
use Phpml\SupportVectorMachine\Kernel;
1314

1415
$dataset = new CsvDataset('data/languages.csv', 1);
1516
$vectorizer = new TokenCountVectorizer(new WordTokenizer());
17+
$tfIdfTransformer = new TfIdfTransformer();
1618

1719
$samples = [];
1820
foreach ($dataset->getSamples() as $sample) {
1921
$samples[] = $sample[0];
2022
}
2123

24+
$vectorizer->fit($samples);
2225
$vectorizer->transform($samples);
26+
27+
$tfIdfTransformer->fit($samples);
28+
$tfIdfTransformer->transform($samples);
29+
2330
$dataset = new ArrayDataset($samples, $dataset->getTargets());
2431

25-
$randomSplit = new RandomSplit($dataset, 0.25);
32+
$randomSplit = new StratifiedRandomSplit($dataset, 0.1);
2633

27-
$classifier = new SVC(Kernel::RBF, 100);
34+
$classifier = new SVC(Kernel::RBF, 10000);
2835
$classifier->train($randomSplit->getTrainSamples(), $randomSplit->getTrainLabels());
2936

3037
$predictedLabels = $classifier->predict($randomSplit->getTestSamples());

0 commit comments

Comments
 (0)