readium · JayPanoz · Nov 13, 2025 · Nov 19, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -40,10 +40,12 @@ jobs:
           ls -la ./
           ls -laR ./build
           ls -laR ./demo
+          ls -laR ./json
           mkdir build-demo
           cp README.md ./build-demo/
           cp -r ./build ./build-demo/
           cp -r ./demo ./build-demo/
+          cp -r ./json ./build-demo/
           ls -laR ./build-demo
 
 

diff --git a/.github/workflows/validate-json.yml b/.github/workflows/validate-json.yml
@@ -0,0 +1,32 @@
+name: Validate JSON Schema
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+    paths:
+      - "json/**/*.json"
+  pull_request:
+    branches:
+      - main
+      - develop
+    paths:
+      - "json/**/*.json"
+  workflow_dispatch:
+
+jobs:
+  validate-json:
+    name: Validate JSON Schema
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Validate JSON against schema
+        uses: dsanders11/json-schema-validate-action@v1.4.0
+        with:
+          schema: "./voices.schema.json"
+          files: |
+            json/**/*.json
+            !json/localizedNames/**
diff --git a/README.md b/README.md
@@ -17,165 +17,252 @@ Readium Speech was spun out as a separate project in order to facilitate its int
 
 ## Current focus
 
-For our initial work on this project, we're focusing on voice selection based on [recommended voices](https://github.com/HadrienGardeur/web-speech-recommended-voices).
+For our initial work on this project, we focused on voice selection based on [recommended voices](https://github.com/HadrienGardeur/web-speech-recommended-voices).
 
 The outline of this work has been explored in a [GitHub discussion](https://github.com/HadrienGardeur/web-speech-recommended-voices/discussions/9) and through a [best practices document](https://github.com/HadrienGardeur/read-aloud-best-practices/blob/main/voice-selection.md).
 
-## Demo
+In the second phase, we focused on implementing a WebSpeech API-based solution with an architecture designed for future extensibility:
 
-[A live demo](https://readium.org/speech/demo/) of the voice selection API is available.
+- **Engine Layer**: Core TTS functionality through `ReadiumSpeechPlaybackEngine`
+- **Navigator Layer**: Content and playback management via (a temporary) `ReadiumSpeechNavigator`
+- **Current Implementation**: WebSpeech API with cross-browser compatibility
+- **Future-Proof Design**: Architecture prepared for additional TTS service adapters
 
-It demonstrates the following features:
+Key features include advanced voice selection, cross-browser playback control, flexible content loading, and comprehensive event handling for UI feedback. The architecture is designed to be extensible for different TTS backends while maintaining TypeScript-first development practices.
+
+## Demos
+
+Two live demos are available:
+
+1. [Voice selection with playback demo](https://readium.org/speech/demo)
+2. [In-context demo](https://readium.org/speech/demo/article)
+
+The first demo showcases the following features:
 
 - fetching a list of all available languages, translating them to the user's locale and sorting them based on these translations
 - returning a list of voices for a given language, grouped by region and sorted based on quality
 - filtering languages and voices based on gender and offline availability
 - using embedded test utterances to demo voices
+- using the current Navigator for playback control
+
+The second demo focuses on in-context reading with seamless voice selection (grouped by region and sorted based on quality), and playback control, providing an optional read-along experience that integrates naturally with the content.
 
 ## QuickStart
 
-At the moment, the new alpha version of the library is not published on npm, so you need to clone the repository and build it yourself.
+### Prerequisites
 
-```sh
-git clone https://github.com/readium/speech.git
-```
+- Node.js
+- npm
 
-```sh
-cd speech
-npm install
-npm run build
-```
+### Installation
 
-You can then link the library to your project, for example using `npm link`.
+1. Clone the repository:
+   ```bash
+   git clone https://github.com/readium/speech.git
+   cd speech
+   ```
 
-```typescript
-import { getVoices } from "readium-speech";
-console.log(getVoices);
+2. Install dependencies:
+   ```bash
+   npm install
+   ```
 
-const voices = await getVoices();
-console.log(voices);
+3. Build the package:
+   ```bash
+   npm run build
+   ```
 
-```
+4. Link the package locally (optional, for development):
+   ```bash
+   npm link
+   # Then in your project directory:
+   # npm link readium-speech
+   ```
 
 ### Basic Usage
 
-Here's how to get started with the Readium Speech library:
-
 ```typescript
-import { WebSpeechReadAloudNavigator } from "readium-speech";
+import { WebSpeechVoiceManager } from "readium-speech";
+
+async function setupVoices() {
+  try {
+    // Initialize the voice manager
+    const voiceManager = await WebSpeechVoiceManager.initialize();
+
+    // Get all available voices
+    const allVoices = voiceManager.getVoices();
+    console.log("Available voices:", allVoices);
+
+    // Get voices with filters
+    const filteredVoices = voiceManager.getVoices({
+      language: ["en", "fr"],
+      gender: "female",
+      quality: "high",
+      offlineOnly: true,
+      excludeNovelty: true,
+      excludeVeryLowQuality: true
+    });
+
+    // Get voices grouped by language
+    const voices = voiceManager.getVoices();
+    const groupedByLanguage = voiceManager.groupVoices(voices, "language");
+
+    // Get a test utterance for a specific language
+    const testText = voiceManager.getTestUtterance("en");
+
+  } catch (error) {
+    console.error("Error initializing voice manager:", error);
+  }
+}
 
-// Initialize the navigator with default WebSpeech engine
-const navigator = new WebSpeechReadAloudNavigator();
+await setupVoices();
+```
 
-// Load content to be read
-navigator.loadContent([
-  { text: "Hello, this is the first sentence.", language: "en-US" },
-  { text: "And this is the second sentence.", language: "en-US" }
-]);
+## API Reference
 
-// Set up event listeners
-navigator.on("start", () => console.log("Playback started"));
-navigator.on("end", () => console.log("Playback finished"));
+### Class: WebSpeechVoiceManager
 
-// Start playback
-navigator.play();
+The main class for managing Web Speech API voices with enhanced functionality.
 
-// Later, you can pause, resume, or stop
-// navigator.pause();
-// navigator.stop();
+#### Initialize the Voice Manager
 
-// Clean up when done
-// navigator.destroy();
+```typescript
+static initialize(maxTimeout?: number, interval?: number): Promise<WebSpeechVoiceManager>
 ```
 
-## Voices API
+Creates and initializes a new WebSpeechVoiceManager instance. This static factory method must be called to create an instance.
 
-### Interface 
+- `maxTimeout`: Maximum time in milliseconds to wait for voices to load (default: 10000ms)
+- `interval`: Interval in milliseconds between voice loading checks (default: 100ms)
+- Returns: Promise that resolves with a new WebSpeechVoiceManager instance
 
-```typescript
-export interface ReadiumSpeechVoices {
-  label: string;
-  voiceURI: string;
-  name: string;
-  language: string;
-  gender?: TGender | undefined;
-  age?: string | undefined;
-  offlineAvailability: boolean;
-  quality?: TQuality | undefined;
-  pitchControl: boolean;
-  recommendedPitch?: number | undefined;
-  recommendedRate?: number | undefined;
-}
+#### Get Available Voices
 
-export interface ILanguages {
-  label: string;
-  code: string;
-  count: number;
-}
+```typescript
+voiceManager.getVoices(options?: VoiceFilterOptions): ReadiumSpeechVoice[]
 ```
 
-#### Parse and Extract ReadiumSpeechVoices from speechSynthesis WebAPI
+Fetches all available voices that match the specified filter criteria.
 
 ```typescript
-function getVoices(preferredLanguage?: string[] | string, localization?: string): Promise<ReadiumSpeechVoices[]>
+interface VoiceFilterOptions {
+  language?: string | string[];  // Filter by language code(s) (e.g., "en", "fr")
+  gender?: TGender;  // "male" | "female" | "other"
+  quality?: TQuality | TQuality[];  // "high" | "medium" | "low" | "veryLow"
+  offlineOnly?: boolean;  // Only return voices available offline
+  provider?: string;  // Filter by voice provider
+  excludeNovelty?: boolean;  // Exclude novelty voices
+  excludeVeryLowQuality?: boolean;  // Exclude very low quality voices
+}
 ```
 
-#### List languages from ReadiumSpeechVoices
+#### Group Voices
 
 ```typescript
-function getLanguages(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string | undefined): ILanguages[]
+voiceManager.groupVoices(voices: ReadiumSpeechVoice[], groupBy: "language" | "region" | "gender" | "quality" | "provider"): VoiceGroup
 ```
 
-#### helpers
-
-```typescript
-function listLanguages(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[]
+Organizes voices into groups based on the specified criteria. The available grouping options are:
 
-function ListRegions(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[]
+- `"language"`: Groups voices by their language code
+- `"region"`: Groups voices by their region
+- `"gender"`: Groups voices by gender
+- `"quality"`: Groups voices by quality level
+- `"provider"`: Groups voices by their provider
 
-function parseSpeechSynthesisVoices(speechSynthesisVoices: SpeechSynthesisVoice[]): ReadiumSpeechVoices[]
+#### Sort Voices
 
-function getSpeechSynthesisVoices(): Promise<SpeechSynthesisVoice[]>
+```typescript
+voiceManager.sortVoices(voices: ReadiumSpeechVoice[], options: SortOptions): ReadiumSpeechVoice[]
 ```
 
-#### groupBy
+Arranges voices according to the specified sorting criteria. The `SortOptions` interface allows you to sort by various properties and specify sort order.
 
 ```typescript
-function groupByKindOfVoices(allVoices: ReadiumSpeechVoices[]): TGroupVoices
-
-function groupByRegions(voices: ReadiumSpeechVoices[], language: string, preferredRegions?: string[] | string, localization?: string): TGroupVoices
-
-function groupByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string): TGroupVoices
+interface SortOptions {
+  by: "name" | "language" | "gender" | "quality" | "region";
+  order?: "asc" | "desc";
+}
 ```
 
-#### sortBy
+### Testing
+
+#### Get Test Utterance
 
 ```typescript
-function sortByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string): ReadiumSpeechVoices[]
+voiceManager.getTestUtterance(language: string): string
+```
 
-function sortByRegion(voices: ReadiumSpeechVoices[], preferredRegions?: string[] | string, localization?: string | undefined): ReadiumSpeechVoices[]
+Retrieves a sample text string suitable for testing text-to-speech functionality in the specified language. If no sample text is available for the specified language, it returns an empty string.
 
-function sortByGender(voices: ReadiumSpeechVoices[], genderFirst: TGender): ReadiumSpeechVoices[]
+### Interfaces
 
-function sortByName(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
+#### `ReadiumSpeechVoice`
 
-function sortByQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
+```typescript
+interface ReadiumSpeechVoice {
+  // Core identification (required)
+  label: string;          // Human-friendly label for the voice
+  name: string;           // System/technical name (matches Web Speech API voiceURI)
+  voiceURI?: string;      // For Web Speech API compatibility
+
+  // Localization
+  language: string;       // BCP-47 language tag
+  localizedName?: TLocalizedName; // Localization pattern (android/apple)
+  altNames?: string[];     // Alternative names (mostly for Apple voices)
+  altLanguage?: string;    // Alternative BCP-47 language tag
+  otherLanguages?: string[]; // Other languages this voice can speak
+  multiLingual?: boolean;  // If voice can handle multiple languages
+
+  // Voice characteristics
+  gender?: TGender;       // Voice gender ("female" | "male" | "neutral")
+  children?: boolean;     // If this is a children's voice
+
+  // Quality and capabilities
+  quality?: TQuality[];    // Available quality levels for this voice ("veryLow" | "low" | "normal" | "high" | "veryHigh")
+  pitchControl?: boolean;  // Whether pitch can be controlled
+
+  // Performance settings
+  pitch?: number;         // Current pitch (0-2, where 1 is normal)
+  rate?: number;          // Speech rate (0.1-10, where 1 is normal)
+
+  // Platform and compatibility
+  browser?: string[];     // Supported browsers
+  os?: string[];          // Supported operating systems
+  preloaded?: boolean;    // If the voice is preloaded on the system
+  nativeID?: string | string[]; // Platform-specific voice ID(s)
+
+  // Additional metadata
+  note?: string;          // Additional notes about the voice
+  provider?: string;      // Voice provider (e.g., "Microsoft", "Google")
+
+  // Allow any additional properties that might be in the JSON
+  [key: string]: any;
+}
 ```
 
-#### filterOn
+#### `LanguageInfo`
 
 ```typescript
-function filterOnRecommended(voices: ReadiumSpeechVoices[], _recommended?: IRecommended[]): TReturnFilterOnRecommended
+interface LanguageInfo {
+  code: string;
+  label: string;
+  count: number;
+}
+```
 
-function filterOnVeryLowQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
+### Enums
 
-function filterOnNovelty(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
+#### `TQuality`
 
-function filterOnQuality(voices: ReadiumSpeechVoices[], quality: TQuality | TQuality[]): ReadiumSpeechVoices[]
+```typescript
+type TQuality = "veryLow" | "low" | "normal" | "high" | "veryHigh";
+```
 
-function filterOnLanguage(voices: ReadiumSpeechVoices[], language: string | string[]): ReadiumSpeechVoices[]
+#### `TGender`
 
-function filterOnGender(voices: ReadiumSpeechVoices[], gender: TGender): ReadiumSpeechVoices[]
+```typescript
+type TGender = "female" | "male" | "neutral";
 ```
 
 ## Playback API