@@ -19,8 +19,6 @@ If bundler is not being used to manage dependencies, install the gem by executin
1919Usage
2020-----
2121
22- NOTE: This gem is still in development. API is not stable for now.
23-
2422``` ruby
2523require " whisper"
2624
@@ -33,9 +31,6 @@ params.duration = 60_000
3331params.max_text_tokens = 300
3432params.translate = true
3533params.print_timestamps = false
36- params.new_segment_callback = -> (output, t0, t1, index) {
37- puts " segment #{ index } : #{ t0 } ms -> #{ t1 } ms: #{ output } "
38- }
3934
4035whisper.transcribe(" path/to/audio.wav" , params) do |whole_text |
4136 puts whole_text
@@ -59,5 +54,57 @@ There are some types of models. See [models][] page for details.
5954
6055Currently, whisper.cpp accepts only 16-bit WAV files.
6156
57+ ### API ###
58+
59+ Once ` Whisper::Context#transcribe ` called, you can retrieve segments by ` #each_segment ` :
60+
61+ ``` ruby
62+ def format_time (time_ms )
63+ sec, decimal_part = time_ms.divmod(1000 )
64+ min, sec = sec.divmod(60 )
65+ hour, min = min.divmod(60 )
66+ " %02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
67+ end
68+
69+ whisper.transcribe(" path/to/audio.wav" , params)
70+
71+ whisper.each_segment.with_index do |segment , index |
72+ line = " [%{nth}: %{st} --> %{ed}] %{text}" % {
73+ nth: index + 1 ,
74+ st: format_time(segment.start_time),
75+ ed: format_time(segment.end_time),
76+ text: segment.text
77+ }
78+ line << " (speaker turned)" if segment.speaker_next_turn?
79+ puts line
80+ end
81+
82+ ```
83+
84+ You can also add hook to params called on new segment:
85+
86+ ``` ruby
87+ def format_time (time_ms )
88+ sec, decimal_part = time_ms.divmod(1000 )
89+ min, sec = sec.divmod(60 )
90+ hour, min = min.divmod(60 )
91+ " %02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
92+ end
93+
94+ # Add hook before calling #transcribe
95+ params.on_new_segment do |segment |
96+ line = " [%{st} --> %{ed}] %{text}" % {
97+ st: format_time(segment.start_time),
98+ ed: format_time(segment.end_time),
99+ text: segment.text
100+ }
101+ line << " (speaker turned)" if segment.speaker_next_turn?
102+ puts line
103+ end
104+
105+ whisper.transcribe(" path/to/audio.wav" , params)
106+
107+ ```
108+
62109[ whisper.cpp ] : https://github.com/ggerganov/whisper.cpp
63110[ models ] : https://github.com/ggerganov/whisper.cpp/tree/master/models
0 commit comments