Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/app/applicationcommands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,21 @@ int runOnce(QGuiApplication &app, const AppConfig &config, double seconds)
{
AudioRecorder recorder(config.audio);
const std::shared_ptr<const TranscriptionEngine> transcriptionEngine = createTranscriptionEngine(config.transcriber);
std::unique_ptr<TranscriptionSession> transcriber = transcriptionEngine->createSession();
RuntimeError runtimeError;
const std::shared_ptr<const TranscriptionModelHandle> model = transcriptionEngine->loadModel(&runtimeError);
if (model == nullptr) {
qCCritical(appLog) << "Failed to load transcription model:" << runtimeError.message;
return 1;
}

std::unique_ptr<TranscriptionSession> transcriber = transcriptionEngine->createSession(model);
if (transcriber == nullptr) {
qCCritical(appLog) << "Failed to create transcription session";
return 1;
}
ClipboardWriter clipboardWriter(QGuiApplication::clipboard());

if (config.transcriber.warmupOnStart) {
RuntimeError runtimeError;
if (!transcriber->warmup(&runtimeError)) {
qCCritical(appLog) << "Failed to warm up transcriber:" << runtimeError.message;
return 1;
Expand Down
2 changes: 2 additions & 0 deletions src/service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ QJsonObject MutterkeyService::diagnostics() const
object.insert(QStringLiteral("transcriptions_completed"), m_transcriptionsCompleted);
object.insert(QStringLiteral("transcriber_backend"),
m_transcriptionWorker != nullptr ? m_transcriptionWorker->backendName() : QStringLiteral("unconfigured"));
object.insert(QStringLiteral("transcriber_model"),
m_transcriptionWorker != nullptr ? m_transcriptionWorker->loadedModelDescription() : QString());
const BackendCapabilities capabilities =
m_transcriptionWorker != nullptr ? m_transcriptionWorker->capabilities() : m_transcriptionEngine->capabilities();
object.insert(QStringLiteral("transcriber_runtime"), capabilities.runtimeDescription);
Expand Down
10 changes: 8 additions & 2 deletions src/transcription/transcriptionengine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@ class WhisperCppTranscriptionEngine final : public TranscriptionEngine
return WhisperCppTranscriber::capabilitiesStatic();
}

[[nodiscard]] std::unique_ptr<TranscriptionSession> createSession() const override
[[nodiscard]] std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error) const override
{
return std::make_unique<WhisperCppTranscriber>(m_config);
return WhisperCppTranscriber::loadModelHandle(m_config, error);
}

[[nodiscard]] std::unique_ptr<TranscriptionSession>
createSession(std::shared_ptr<const TranscriptionModelHandle> model) const override
{
return WhisperCppTranscriber::createSession(m_config, std::move(model));
}

private:
Expand Down
54 changes: 51 additions & 3 deletions src/transcription/transcriptionengine.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,37 @@ struct Recording;
* @brief Stable engine/session boundary for embedded transcription backends.
*/

/**
* @brief Immutable loaded-model interface created by a transcription engine.
*
* Handles own validated backend assets and may be shared across multiple
* independent sessions without exposing backend-specific state to app code.
*/
class TranscriptionModelHandle
{
public:
virtual ~TranscriptionModelHandle() = default;
TranscriptionModelHandle(const TranscriptionModelHandle &) = delete;
TranscriptionModelHandle &operator=(const TranscriptionModelHandle &) = delete;
TranscriptionModelHandle(TranscriptionModelHandle &&) = delete;
TranscriptionModelHandle &operator=(TranscriptionModelHandle &&) = delete;

/**
* @brief Returns the backend identifier for this loaded model.
* @return Short backend name used in diagnostics.
*/
[[nodiscard]] virtual QString backendName() const = 0;

/**
* @brief Returns a human-readable description of the loaded model.
* @return Diagnostic model description such as the resolved model path.
*/
[[nodiscard]] virtual QString modelDescription() const = 0;

protected:
TranscriptionModelHandle() = default;
};

/**
* @brief Mutable per-session transcription interface.
*
Expand Down Expand Up @@ -47,6 +78,14 @@ class TranscriptionSession
*/
[[nodiscard]] virtual TranscriptionResult transcribe(const Recording &recording) = 0;

/**
* @brief Requests cooperative cancellation of any active decode.
*
* Implementations should stop in-flight backend work best-effort without
* using thread interruption.
*/
virtual void cancel() = 0;

protected:
TranscriptionSession() = default;
};
Expand All @@ -73,10 +112,19 @@ class TranscriptionEngine
[[nodiscard]] virtual BackendCapabilities capabilities() const = 0;

/**
* @brief Creates a new isolated transcription session.
* @return Newly constructed session that owns its backend state.
* @brief Loads an immutable validated model handle for this engine.
* @param error Optional destination for a structured failure reason.
* @return Shared loaded-model handle suitable for multiple sessions.
*/
[[nodiscard]] virtual std::shared_ptr<const TranscriptionModelHandle> loadModel(RuntimeError *error = nullptr) const = 0;

/**
* @brief Creates a new isolated transcription session from a loaded model.
* @param model Shared immutable model handle created by this engine.
* @return Newly constructed session that owns only mutable backend state.
*/
[[nodiscard]] virtual std::unique_ptr<TranscriptionSession> createSession() const = 0;
[[nodiscard]] virtual std::unique_ptr<TranscriptionSession>
createSession(std::shared_ptr<const TranscriptionModelHandle> model) const = 0;

protected:
TranscriptionEngine() = default;
Expand Down
1 change: 1 addition & 0 deletions src/transcription/transcriptiontypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
enum class RuntimeErrorCode : std::uint8_t {
None,
Cancelled,
InvalidConfig,
ModelNotFound,
ModelLoadFailed,
Expand Down
58 changes: 56 additions & 2 deletions src/transcription/transcriptionworker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,27 @@ BackendCapabilities TranscriptionWorker::capabilities() const
return m_capabilities;
}

QString TranscriptionWorker::loadedModelDescription() const
{
if (m_model == nullptr) {
return {};
}

return m_model->modelDescription();
}

bool TranscriptionWorker::warmup(RuntimeError *error)
{
if (!ensureSession(error)) {
return false;
}

return m_transcriber->warmup(error);
const bool ready = m_transcriber->warmup(error);
if (!ready && (error == nullptr || shouldDiscardSession(*error))) {
m_transcriber.reset();
}

return ready;
}

void TranscriptionWorker::transcribe(const Recording &recording)
Expand All @@ -57,6 +71,9 @@ void TranscriptionWorker::transcribe(const Recording &recording)

const TranscriptionResult result = m_transcriber->transcribe(recording);
if (!result.success) {
if (shouldDiscardSession(result.error)) {
m_transcriber.reset();
}
emit transcriptionFailed(result.error);
return;
}
Expand All @@ -70,6 +87,10 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
return true;
}

if (!ensureModel(error)) {
return false;
}

if (m_engine == nullptr) {
if (error != nullptr) {
*error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
Expand All @@ -78,7 +99,7 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
return false;
}

m_transcriber = m_engine->createSession();
m_transcriber = m_engine->createSession(m_model);
if (m_transcriber == nullptr) {
if (error != nullptr) {
*error = makeRuntimeError(RuntimeErrorCode::InternalRuntimeError,
Expand All @@ -88,3 +109,36 @@ bool TranscriptionWorker::ensureSession(RuntimeError *error)
}
return true;
}

bool TranscriptionWorker::ensureModel(RuntimeError *error)
{
if (m_model != nullptr) {
return true;
}

if (m_engine == nullptr) {
return true;
}

m_model = m_engine->loadModel(error);
return m_model != nullptr;
}

bool TranscriptionWorker::shouldDiscardSession(const RuntimeError &error)
{
switch (error.code) {
case RuntimeErrorCode::Cancelled:
case RuntimeErrorCode::ModelLoadFailed:
case RuntimeErrorCode::DecodeFailed:
case RuntimeErrorCode::InternalRuntimeError:
return true;
case RuntimeErrorCode::None:
case RuntimeErrorCode::InvalidConfig:
case RuntimeErrorCode::ModelNotFound:
case RuntimeErrorCode::AudioNormalizationFailed:
case RuntimeErrorCode::UnsupportedLanguage:
return false;
}

return true;
}
11 changes: 11 additions & 0 deletions src/transcription/transcriptionworker.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ class TranscriptionWorker final : public QObject
*/
[[nodiscard]] BackendCapabilities capabilities() const;

/**
* @brief Returns the currently loaded model description, if available.
* @return Human-readable loaded model description or an empty string.
*/
[[nodiscard]] QString loadedModelDescription() const;

/**
* @brief Eagerly initializes backend state before the first real transcription.
* @param error Optional output for warmup failures.
Expand Down Expand Up @@ -78,10 +84,15 @@ class TranscriptionWorker final : public QObject
void transcriptionFailed(const RuntimeError &error);

private:
static bool shouldDiscardSession(const RuntimeError &error);

bool ensureModel(RuntimeError *error = nullptr);
bool ensureSession(RuntimeError *error = nullptr);

/// Shared immutable engine used to create the live session lazily on the worker thread.
std::shared_ptr<const TranscriptionEngine> m_engine;
/// Shared immutable loaded model handle reused across session instances.
std::shared_ptr<const TranscriptionModelHandle> m_model;
/// Capability snapshot reported even before the first session exists.
BackendCapabilities m_capabilities;
/// Owned transcription backend implementation.
Expand Down
Loading
Loading