Skip to content

Commit 80b592b

Browse files
committed
chore: squash plugin-nlu's branch commits
1 parent c65fd7b commit 80b592b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1018
-1877
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { Command, flags } from '@oclif/command'
2+
import { track, getGlobalNodeModulesPath } from '../utils'
3+
import * as colors from 'colors'
4+
const path = require('path')
5+
6+
export default class Run extends Command {
7+
static description = 'Serve your bot in your localhost'
8+
9+
static examples = [
10+
`$ botonic train
11+
TRAINING MODEL FOR {LANGUAGE}...
12+
`
13+
]
14+
15+
static flags = {
16+
lang: flags.string()
17+
}
18+
19+
static args = []
20+
21+
async run() {
22+
const { args, flags } = this.parse(Run)
23+
24+
const botonicNLUPath: string = path.join(
25+
process.cwd(),
26+
'node_modules',
27+
'@botonic',
28+
'nlu'
29+
)
30+
try {
31+
const { BotonicNLU, CONSTANTS } = await import(botonicNLUPath)
32+
process.argv.push(CONSTANTS.LANG_FLAG)
33+
if (flags.lang) {
34+
process.argv.push(flags.lang)
35+
}
36+
track('Trained with Botonic train')
37+
const botonicNLU = new BotonicNLU()
38+
const nluPath = path.join(process.cwd(), 'src', CONSTANTS.NLU_DIRNAME)
39+
await botonicNLU.train({ nluPath })
40+
} catch (e) {
41+
console.log(
42+
`You don't have @botonic/nlu installed.\nPlease, install it by typing the following command:`
43+
.red
44+
)
45+
console.log(` $ npm install @botonic/nlu`)
46+
}
47+
}
48+
}

packages/botonic-cli/src/utils.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ const fs = require('fs')
22
const os = require('os')
33
const path = require('path')
44
const Analytics = require('analytics-node')
5+
import { exec } from 'child_process'
56

67
export var analytics: any
78

@@ -62,3 +63,23 @@ export function botonicPostInstall() {
6263
export function sleep(ms: number) {
6364
return new Promise(resolve => setTimeout(resolve, ms))
6465
}
66+
67+
async function sh(cmd) {
68+
return new Promise(function(resolve, reject) {
69+
exec(cmd, (err, stdout, stderr) => {
70+
if (err) {
71+
reject(err)
72+
} else {
73+
resolve({ stdout, stderr })
74+
}
75+
})
76+
})
77+
}
78+
79+
export async function getGlobalNodeModulesPath() {
80+
const CROSS_PLATFORM_REGEX = /\r?\n|\r/g
81+
return ((await sh('npm root -g')) as any).stdout.replace(
82+
CROSS_PLATFORM_REGEX,
83+
''
84+
)
85+
}

packages/botonic-nlu/.babelrc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"presets": [
3+
[
4+
"@babel/preset-env",
5+
{
6+
"modules": "umd"
7+
}
8+
]
9+
],
10+
"plugins": ["@babel/plugin-transform-runtime"]
11+
}
File renamed without changes.

packages/botonic-nlu/package.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"name": "@botonic/nlu",
3+
"version": "0.1.0",
4+
"main": "lib/index",
5+
"scripts": {
6+
"build": "rm -rf lib && babel src -d lib",
7+
"test": "jest"
8+
},
9+
"jest": {
10+
"testEnvironment": "node"
11+
},
12+
"dependencies": {
13+
"@babel/runtime": "^7.5.5",
14+
"@tensorflow/tfjs": "^1.2.7",
15+
"@tensorflow/tfjs-node": "^1.2.7",
16+
"axios": "^0.19.0",
17+
"colors": "^1.3.3",
18+
"compromise": "^11.13.2",
19+
"compromise-plugin": "0.0.9",
20+
"franc": "^4.1.0",
21+
"fs": "0.0.1-security",
22+
"inquirer": "^6.3.1",
23+
"sqlite-async": "^1.0.11"
24+
},
25+
"devDependencies": {
26+
"@babel/cli": "^7.5.5",
27+
"@babel/core": "^7.5.5",
28+
"@babel/plugin-transform-runtime": "^7.5.5",
29+
"@babel/preset-env": "^7.5.5",
30+
"@types/jest": "^24.0.17",
31+
"jest": "^24.8.0"
32+
}
33+
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import path from 'path'
2+
import { readJSON, readDir } from './file-utils'
3+
import { detectLang, preprocessData } from './preprocessing'
4+
import { getEmbeddingMatrix } from './word-embeddings'
5+
import * as tf from '@tensorflow/tfjs-node'
6+
import { parseLangFlag, printPrettyConfig } from './utils'
7+
import {
8+
UTTERANCES_DIRNAME,
9+
MODELS_DIRNAME,
10+
NLU_DATA_FILENAME,
11+
MODEL_FILENAME
12+
} from './constants'
13+
import { loadDevData, saveDevData } from './file-utils'
14+
import { getPrediction, getIntent } from './prediction'
15+
import { getEntities } from './ner'
16+
17+
// TODO: interactive command to try intents from terminal
18+
// import inquirer from 'inquirer'
19+
// import { interactiveMode } from './scripts/interactive-mode'
20+
// async function askForInteractiveMode() {
21+
// const questions = [
22+
// {
23+
// type: 'confirm',
24+
// name: 'affirmative',
25+
// message: `Do you want to switch into interactive mode?`
26+
// }
27+
// ]
28+
// return inquirer.prompt(questions)
29+
// }
30+
31+
export class BotonicNLU {
32+
constructor() {
33+
this.languages = parseLangFlag(process.argv)
34+
this.nluPath = ''
35+
this.utterancesPath = ''
36+
this.modelsPath = ''
37+
this.devData = {}
38+
this.models = {}
39+
}
40+
41+
async train({ nluPath }) {
42+
// TODO: Think about passing an arg for using models in memory
43+
this.nluPath = nluPath
44+
this.utterancesPath = path.join(nluPath, UTTERANCES_DIRNAME)
45+
this.modelsPath = path.join(nluPath, MODELS_DIRNAME)
46+
this.devData = loadDevData(this.nluPath, this.languages)
47+
this.languages = Object.keys(this.devData)
48+
for (let language of this.languages) {
49+
let devData = this.devData[language]
50+
let { devIntents, params, devEntities } = devData
51+
params = { ...params, language } // TODO: Think better this reassignment
52+
printPrettyConfig(params)
53+
let start = new Date()
54+
let {
55+
tensorData,
56+
tensorLabels,
57+
vocabulary,
58+
vocabularyLength
59+
} = preprocessData(devIntents, params)
60+
let embeddingMatrix = await getEmbeddingMatrix({
61+
vocabulary,
62+
vocabularyLength,
63+
params
64+
})
65+
this.models[language] = embeddingLSTMModel({
66+
params,
67+
vocabularyLength,
68+
embeddingMatrix: tf.tensor(embeddingMatrix),
69+
outputDim: Object.keys(devIntents.intentsDict).length
70+
})
71+
this.models[language].summary()
72+
this.models[language].compile({
73+
optimizer: tf.train.adam(params.LEARNING_RATE),
74+
loss: 'categoricalCrossentropy',
75+
metrics: ['accuracy']
76+
})
77+
console.log('TRAINING...')
78+
79+
const history = await this.models[language].fit(
80+
tensorData,
81+
tensorLabels,
82+
{
83+
epochs: params.EPOCHS,
84+
validationSplit: params.VALIDATION_SPLIT
85+
}
86+
)
87+
let end = new Date() - start
88+
console.log(`\nTOTAL TRAINING TIME: ${end}ms`)
89+
let nluData = {
90+
maxSeqLength: params.MAX_SEQ_LENGTH,
91+
vocabulary,
92+
intentsDict: devIntents.intentsDict,
93+
language,
94+
devEntities
95+
}
96+
await saveDevData({
97+
modelsPath: this.modelsPath,
98+
model: this.models[language],
99+
language,
100+
nluData
101+
})
102+
}
103+
}
104+
105+
async loadModels({ modelsPath }) {
106+
let models = {}
107+
models.languages = readDir(modelsPath)
108+
for (let language of models.languages) {
109+
models[language] = {}
110+
models[language].nluData = readJSON(
111+
path.join(modelsPath, language, NLU_DATA_FILENAME)
112+
)
113+
models[language].model = await tf.loadLayersModel(
114+
`file://${modelsPath}/${language}/${MODEL_FILENAME}`
115+
)
116+
}
117+
return models
118+
}
119+
predict(models, input) {
120+
let language = detectLang(input, models.languages)
121+
let { model, nluData } = models[language]
122+
let prediction = getPrediction(input, model, nluData)
123+
let intent = getIntent(prediction, nluData.intentsDict, language)
124+
let entities = getEntities(input, nluData.devEntities)
125+
return { intent, entities }
126+
}
127+
// static async interactive({ modelsPath, languages }) {
128+
// let wantsInteractiveMode = await askForInteractiveMode()
129+
// if (wantsInteractiveMode.affirmative) {
130+
// let modelsLanguages =
131+
// parseLangFlag(process.argv) || languages || readDir(modelsPath)
132+
// let nlus = {}
133+
// for (let lang of modelsLanguages) {
134+
// nlus[`${lang}`] = {}
135+
// nlus[`${lang}`].nluData = readJSON(
136+
// path.join(modelsPath, lang, NLU_DATA_FILENAME)
137+
// )
138+
// nlus[`${lang}`].model = await tf.loadLayersModel(
139+
// `file://${modelsPath}/${lang}/${MODEL_FILENAME}`
140+
// )
141+
// }
142+
// interactiveMode(nlus)
143+
// }
144+
// }
145+
}
146+
function embeddingLSTMModel({
147+
vocabularyLength,
148+
embeddingMatrix,
149+
params,
150+
outputDim
151+
}) {
152+
let model = tf.sequential()
153+
model.add(
154+
tf.layers.embedding({
155+
inputDim: vocabularyLength,
156+
outputDim: params.EMBEDDING_DIM,
157+
inputLength: params.MAX_SEQ_LENGTH,
158+
trainable: params.TRAINABLE_EMBEDDINGS,
159+
weights: [embeddingMatrix]
160+
})
161+
)
162+
163+
model.add(
164+
// tf.layers.bidirectional({
165+
// layer: tf.layers.lstm({
166+
// units: params.UNITS,
167+
// dropout: params.DROPOUT_REG,
168+
// recurrentDropout: params.DROPOUT_REG
169+
// })
170+
// })
171+
tf.layers.lstm({
172+
units: params.UNITS,
173+
dropout: params.DROPOUT_REG,
174+
recurrentDropout: params.DROPOUT_REG
175+
})
176+
)
177+
model.add(
178+
tf.layers.dense({
179+
units: outputDim,
180+
activation: 'softmax'
181+
})
182+
)
183+
return model
184+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Execution
2+
export const LANG_FLAG = '--lang'
3+
// Filenames
4+
export const NLU_DATA_FILENAME = 'nlu-data.json'
5+
export const MODEL_FILENAME = 'model.json'
6+
// Dirnames
7+
export const MODELS_DIRNAME = 'models'
8+
export const UTTERANCES_DIRNAME = 'utterances'
9+
// Subpaths
10+
export const NLU_DIRNAME = 'nlu'
11+
export const NLU_CONFIG_FILENAME = 'nlu.config.json'
12+
export const GLOBAL_CONFIG_DIRNAME = '.botonic'
13+
export const WORD_EMBEDDINGS_DIRNAME = 'word-embeddings'
14+
15+
// General Config
16+
export const UTTERANCES_EXTENSION = '.txt'
17+
export const ASSETS_DIRNAME = 'assets'
18+
export const UNKNOWN_TOKEN = '<UNK>'
19+
export const DB = {
20+
TABLE: 'embeddings',
21+
COLUMN: 'token'
22+
}
23+
export const WORD_EMBEDDDINGS_ENDPOINT =
24+
'https://s3-eu-west-1.amazonaws.com/word-embeddings.hubtype.com'
25+
26+
//Entities
27+
export const ENTITIES_REGEX = /\[(.*?)\]\((.*?)\)/
28+
export const GLOBAL_ENTITIES_REGEX = /\[(.*?)\]\((.*?)\)/g
29+
export const DEFAULT_ENTITIES = [
30+
// Nouns
31+
'Organization',
32+
'Currency',
33+
'Unit',
34+
// Places
35+
'Country',
36+
'Region',
37+
'Place',
38+
'City',
39+
// Dates
40+
'WeekDay',
41+
'Date',
42+
'Holiday',
43+
'Month',
44+
'Duration',
45+
'Time',
46+
// People
47+
'FirstName',
48+
'LastName',
49+
'MaleName',
50+
'FemaleName',
51+
'Honorific',
52+
'Person'
53+
]

0 commit comments

Comments
 (0)