Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Support for flac and opus for batch transcription #62

Merged
merged 3 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/golangci-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.17
go-version: 1.18
- uses: actions/checkout@v3
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.17
go-version: 1.18
- uses: actions/checkout@v3
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
Expand All @@ -24,7 +24,7 @@ jobs:
- name: Install Go
uses: actions/setup-go@v3
with:
go-version: 1.17
go-version: 1.18
- uses: actions/checkout@v3
- name: build
run: make build
Expand All @@ -41,7 +41,7 @@ jobs:
fetch-depth: 0
- uses: actions/setup-go@v3
with:
go-version: 1.17
go-version: 1.18
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Install Go
uses: actions/setup-go@v3
with:
go-version: 1.17
go-version: 1.18
- uses: actions/checkout@v3
- name: build
run: make build
Expand Down
48 changes: 31 additions & 17 deletions cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func readAudioCorpus(filename string) ([]AudioCorpusItem, error) {
return nil, err
}
ac := make([]AudioCorpusItem, 0)
if strings.HasSuffix(filename, "wav") {
if strings.HasSuffix(filename, "wav") || strings.HasSuffix(filename, "opus") || strings.HasSuffix(filename, "flac") {
return []AudioCorpusItem{{Audio: filename}}, nil
}
jd := json.NewDecoder(f)
Expand Down Expand Up @@ -325,15 +325,19 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
audioFilePath = corpusPath
}

err = readAudio(audioFilePath, aci, func(buffer audio.IntBuffer, n int) error {
buffer16 := make([]uint16, len(buffer.Data))
for i, x := range buffer.Data {
buffer16[i] = uint16(x)
}
buf := new(bytes.Buffer)
err = binary.Write(buf, binary.LittleEndian, buffer16)
if err != nil {
return fmt.Errorf("binary.Write: %v", err)
f, err := os.Open(audioFilePath)
if err != nil {
barClearOnError(bar)
return nil, err
}
buf := make([]byte, 65536)
for {
n, err := f.Read(buf)
if err == io.EOF {
break
} else if err != nil {
barClearOnError(bar)
return nil, err
}

err = paStream.Send(&sluv1.ProcessAudioRequest{
Expand All @@ -343,16 +347,14 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
Channels: 1,
SampleRateHertz: 16000,
},
Source: &sluv1.ProcessAudioRequest_Audio{Audio: buf.Bytes()},
Source: &sluv1.ProcessAudioRequest_Audio{Audio: buf[:n]},
})

if err != nil {
return fmt.Errorf("sending %d process audio request failed: %w", buf.Len(), err)
barClearOnError(bar)
return nil, err
}
return nil
})
if err != nil {
barClearOnError(bar)
return nil, err

}

err = bar.Add(1)
Expand All @@ -362,6 +364,10 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
}

paResp, err := paStream.CloseAndRecv()
if err != nil {
barClearOnError(bar)
return nil, err
}
bID := paResp.GetOperation().GetId()
pending[bID] = aci
}
Expand All @@ -375,6 +381,7 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
var results []AudioCorpusItem

bar = getBar("Transcribing", "utt", inputSize)
isDone := false
for {
for bID, aci := range pending {
status, err := client.QueryStatus(ctx, &sluv1.QueryStatusRequest{Id: bID})
Expand All @@ -386,6 +393,13 @@ func transcribeWithBatchAPI(ctx context.Context, appID string, corpusPath string
case sluv1.Operation_STATUS_DONE:
trs := status.GetOperation().GetTranscripts()
words := make([]string, len(trs))
if !isDone && len(trs) == 0 {
// Results might not be available immediately after done state is reached, so if we do
// not have any, let's wait for a bit.
isDone = true
time.Sleep(2 * time.Second)
continue
}
for i, tr := range trs {
words[i] = tr.Word
}
Expand Down
57 changes: 27 additions & 30 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
module github.com/speechly/cli

go 1.17
go 1.18

require (
github.com/agnivade/levenshtein v1.1.1
github.com/go-audio/audio v1.0.0
github.com/go-audio/wav v1.1.0
github.com/mattn/go-isatty v0.0.16
github.com/mattn/go-isatty v0.0.17
github.com/mitchellh/go-homedir v1.1.0
github.com/schollz/progressbar/v3 v3.11.0
github.com/speechly/api/go v0.0.0-20220920060221-2531f4783d08
github.com/speechly/nwalgo v0.0.0-20221109101309-d1a337619dd3
github.com/spf13/cobra v1.3.0
github.com/spf13/viper v1.10.0
golang.org/x/text v0.4.0
google.golang.org/grpc v1.42.0
google.golang.org/protobuf v1.27.1
github.com/schollz/progressbar/v3 v3.13.0
github.com/speechly/api/go v0.0.0-20230221135950-6d68efe6ac91
github.com/speechly/nwalgo v0.0.0-20221109110948-f6606115e74b
github.com/spf13/cobra v1.6.1
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.15.0
golang.org/x/text v0.7.0
google.golang.org/grpc v1.53.0
google.golang.org/protobuf v1.28.1
)

require (
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/fsnotify/fsnotify v1.5.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/magiconair/properties v1.8.5 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/mitchellh/mapstructure v1.4.3 // indirect
github.com/pelletier/go-toml v1.9.4 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/spf13/afero v1.6.0 // indirect
github.com/spf13/cast v1.4.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/pelletier/go-toml/v2 v2.0.6 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/spf13/afero v1.9.4 // indirect
github.com/spf13/cast v1.5.0 // indirect
github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/subosito/gotenv v1.2.0 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 // indirect
golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 // indirect
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
github.com/subosito/gotenv v1.4.2 // indirect
golang.org/x/net v0.7.0 // indirect
golang.org/x/sys v0.5.0 // indirect
golang.org/x/term v0.5.0 // indirect
google.golang.org/genproto v0.0.0-20230223222841-637eb2293923 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/ini.v1 v1.66.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading