-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsnowboy.go
282 lines (252 loc) · 7.48 KB
/
snowboy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
package snowboy
import (
"errors"
"io"
"strconv"
"strings"
"time"
"unsafe"
"github.com/Kitt-AI/snowboy/swig/Go"
)
type snowboyResult int
const (
snowboyResultSilence snowboyResult = -2
snowboyResultError = -1
snowboyResultNoDetection = 0
)
var (
NoHandler = errors.New("No handler installed")
SnowboyLibraryError = errors.New("snowboy library error")
)
// Detector is holds the context and base impl for snowboy audio detection
type Detector struct {
raw snowboydetect.SnowboyDetect
initialized bool
handlers []handlerKeyword
silenceHandler *handlerKeyword
modelStr string
sensitivityStr string
silenceThreshold time.Duration
silenceElapsed time.Duration
ResourceFile string
AudioGain float32
}
// Creates a standard Detector from a resources file
//
// Gives a default gain of 1.0
func NewDetector(resourceFile string) Detector {
return Detector{
ResourceFile: resourceFile,
AudioGain: 1.0,
}
}
// Fetch the format for the expected audio input
//
// Returns sample rate, number of channels, and bit depth
func (d *Detector) AudioFormat() (sampleRate, numChannels, bitsPerSample int) {
d.initialize()
sampleRate = d.raw.SampleRate()
numChannels = d.raw.NumChannels()
bitsPerSample = d.raw.BitsPerSample()
return
}
// Close handles cleanup required by snowboy library
//
// Clients must call Close on detectors after doing any detection
// Returns error if Detector was never used
func (d *Detector) Close() error {
if d.initialized {
d.initialized = false
snowboydetect.DeleteSnowboyDetect(d.raw)
return nil
} else {
return errors.New("snowboy not initialize")
}
}
// Calls previously installed handlers if hotwords are detected in data
//
// Does not chunk data because it is all available. Assumes entire
// length of data is filled and will only call one handler
func (d *Detector) Detect(data []byte) error {
d.initialize()
return d.route(d.runDetection(data))
}
// Install a handler for the given hotword
func (d *Detector) Handle(hotword Hotword, handler Handler) {
if len(d.handlers) > 0 {
d.modelStr += ","
d.sensitivityStr += ","
}
d.modelStr += hotword.Model
d.sensitivityStr += strconv.FormatFloat(float64(hotword.Sensitivity), 'f', 2, 64)
d.handlers = append(d.handlers, handlerKeyword{
Handler: handler,
keyword: hotword.Name,
})
}
// Installs a handle for the given hotword based on the func argument
// instead of the Handler interface
func (d *Detector) HandleFunc(hotword Hotword, handler func(string)) {
d.Handle(hotword, handlerFunc(handler))
}
// Install a handler for when silence is detected
//
// threshold (time.Duration) determined how long silence can occur before callback is called
func (d *Detector) HandleSilence(threshold time.Duration, handler Handler) {
d.silenceThreshold = threshold
d.silenceHandler = &handlerKeyword{
Handler: handler,
keyword: "silence",
}
}
// Installs a handle for when silence is detected based on the func argument
// instead of the Handler interface
//
// threshold (time.Duration) determined how long silence can occur before callback is called
func (d *Detector) HandleSilenceFunc(threshold time.Duration, handler func(string)) {
d.HandleSilence(threshold, handlerFunc(handler))
}
// Reads from data and calls previously installed handlers when detection occurs
//
// Blocks while reading from data in chunks of 2048 bytes. Data examples include
// file, pipe from Stdout of exec, response from http call, any reader really.
//
// *Note, be careful with using byte.Buffer for data. When io.EOF is received from
// a read call on data, ReadAndDetect will exit
func (d *Detector) ReadAndDetect(data io.Reader) error {
d.initialize()
bytes := make([]byte, 2048)
for {
n, err := data.Read(bytes)
if err != nil {
if err == io.EOF {
// Run detection on remaining bytes
return d.route(d.runDetection(bytes))
}
return err
}
if n == 0 {
// No data to read yet, but not eof so wait and try later
time.Sleep(300 * time.Millisecond)
continue
}
err = d.route(d.runDetection(bytes))
if err != nil {
return err
}
}
}
// Resets the detection. The underlying snowboy object handles voice
// activity detection (VAD) internally, but if you are using an external
// VAD, you should call Reset() whenever you see the segment end.
func (d *Detector) Reset() bool {
d.initialize()
return d.raw.Reset()
}
// Applies a fixed gain to the input audio. In case you have a very weak
// microphone, you can use this function to boost input audio level.
func (d *Detector) SetAudioGain(gain float32) {
d.initialize()
d.raw.SetAudioGain(gain)
}
// Returns the number of loaded hotwords
func (d *Detector) NumNotwords() int {
d.initialize()
return d.raw.NumHotwords()
}
// Applies or removes frontend audio processing
func (d *Detector) ApplyFrontend(apply bool) {
d.initialize()
d.raw.ApplyFrontend(apply)
}
func (d *Detector) initialize() {
if d.initialized {
return
}
if d.modelStr == "" {
panic(errors.New("no models set for detector"))
}
d.raw = snowboydetect.NewSnowboyDetect(d.ResourceFile, d.modelStr)
d.raw.SetSensitivity(d.sensitivityStr)
d.raw.SetAudioGain(d.AudioGain)
d.initialized = true
}
func (d *Detector) route(result snowboyResult) error {
if result == snowboyResultError {
return SnowboyLibraryError
} else if result == snowboyResultSilence {
if d.silenceElapsed >= d.silenceThreshold && d.silenceHandler != nil {
d.silenceElapsed = 0
d.silenceHandler.call()
}
} else if result != snowboyResultNoDetection {
if len(d.handlers) >= int(result) {
d.handlers[int(result)-1].call()
} else {
return NoHandler
}
}
return nil
}
func (d *Detector) runDetection(data []byte) snowboyResult {
if len(data) == 0 {
return 0
}
ptr := snowboydetect.SwigcptrInt16_t(unsafe.Pointer(&data[0]))
result := snowboyResult(d.raw.RunDetection(ptr, len(data)/2 /* len of int16 */))
if result == snowboyResultSilence {
sampleRate, numChannels, bitDepth := d.AudioFormat()
dataElapseTime := len(data) * int(time.Second) / (numChannels * (bitDepth / 8) * sampleRate)
d.silenceElapsed += time.Duration(dataElapseTime)
} else {
// Reset silence elapse duration because non-silence was detected
d.silenceElapsed = 0
}
return result
}
// A Handler is used to handle when keywords are detected
//
// Detected will be call with the keyword string
type Handler interface {
Detected(string)
}
type handlerKeyword struct {
Handler
keyword string
}
func (h handlerKeyword) call() {
h.Handler.Detected(h.keyword)
}
type handlerFunc func(string)
func (f handlerFunc) Detected(keyword string) {
f(keyword)
}
// A Hotword represents a model filename and sensitivity for a snowboy detectable word
//
// Model is the filename for the .umdl file
//
// Sensitivity is the sensitivity of this specific hotword
//
// Name is what will be used in calls to Handler.Detected(string)
type Hotword struct {
Model string
Sensitivity float32
Name string
}
// Creates a hotword from model only, parsing the hotward name from the model filename
// and using a sensitivity of 0.5
func NewDefaultHotword(model string) Hotword {
return NewHotword(model, 0.5)
}
// Creates a hotword from model and sensitivity only, parsing
// the hotward name from the model filename
func NewHotword(model string, sensitivity float32) Hotword {
h := Hotword{
Model: model,
Sensitivity: sensitivity,
}
name := strings.TrimRight(model, ".umdl")
nameParts := strings.Split(name, "/")
h.Name = nameParts[len(nameParts)-1]
return h
}