(beta): Release v1.0.0b0 of the Python SDK (elevenlabs#196)

Bit-o-lodeon-Ai · Mar 12, 2024 · 53b732a · 53b732a
1 parent 2e12f6d
commit 53b732a
Show file tree

Hide file tree

Showing 131 changed files with 8,673 additions and 4,600 deletions.
diff --git a/.fernignore b/.fernignore
@@ -1,14 +1,9 @@
 # Specify files that shouldn't be modified by Fern
 
-src/elevenlabs/__init__.py
-src/elevenlabs/model.py
 src/elevenlabs/client.py
 src/elevenlabs/play.py
-src/elevenlabs/tts.py
-src/elevenlabs/voice.py
-src/elevenlabs/generate.py
-src/elevenlabs/clone.py
-src/elevenlabs/resources/voices/client.py
+src/elevenlabs/realtime_tts.py
+
 .github/workflows/ci.yml
 
 README.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/README.md b/README.md
@@ -20,8 +20,8 @@ Check out the [HTTP API documentation](https://elevenlabs.io/docs/api-reference)
 pip install elevenlabs==0.3.0b0
 ```
 
-## V3 Migration Guide
-> The SDK was rewritten in v3 and is now programatically generated from our OpenAPI spec. As part of this release 
+## v0.x to v1.x Migration Guide
+> The SDK was rewritten in v1 and is now programatically generated from our OpenAPI spec. As part of this release 
 > there are some breaking changes. 
 
 
@@ -32,7 +32,9 @@ endpoints in our API.
 ```python
 from elevenlabs.client import ElevenLabs
 
-client = ElevenLabs(api_key="...")
+client = ElevenLabs(
+  api_key="..." # Defaults to ELEVEN_API_KEY
+)
 ```
 As part of this change, there is no longer a `set_api_key` and `get_api_key` method exported. 
 
@@ -43,7 +45,10 @@ a synchronous client. Note that you can pass in your own httpx client as well.
 ```python
 from elevenlabs.client import AsyncElevenLabs
 
-client = AsyncElevenLabs(api_key="...", httpx=httpx.AsyncClient(...))
+client = AsyncElevenLabs(
+  api_key="...",  # Defaults to ELEVEN_API_KEY
+  httpx=httpx.AsyncClient(...)
+)
 ```
 
 ### Removing Static Methods
@@ -61,9 +66,29 @@ The renames are specified below:
   `History.from_api()` -> `client.history.get_all()` 
 
 
-### Maintaining Helper Methods
-The SDK continues to export methods for `generate`, `play`, `clone`, and
-`voices` which are detailed in the README below. 
+### Exported functions
+The SDK no longer exports top level functions `generate`, `clone`, and `voices`. Instead, 
+everything is now directly attached to the client instance. 
+
+#### `generate` -> `client.generate`
+
+The generate method is a helper function that makes it easier to consume the 
+text-to-speech APIs. If you'd rather access the raw APIs, simply use `client.text_to_speech`. 
+
+#### `clone` -> `client.clone`
+
+The clone method is a helper function that wraps the voices add and 
+get APIs. If you'd rather access the raw APIs, simply use `client.voices.add()`. 
+
+#### `voice` -> `client.voices.get_all()` 
+
+To get all your voices, use `client.voices.get_all()`. 
+
+#### `play` and `stream`
+
+The SDK continues to export the `play` and `stream` methods. Under the hood, these methods
+use ffmpeg and mpv to play audio streams. 
+
 
 ## 🗣️ Usage
 [![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/elevenlabs/tts)
@@ -72,15 +97,18 @@ The SDK continues to export methods for `generate`, `play`, `clone`, and
 We support two main models: the newest `eleven_multilingual_v2`, a single foundational model supporting 29 languages including English, Chinese, Spanish, Hindi, Portuguese, French, German, Japanese, Arabic, Korean, Indonesian, Italian, Dutch, Turkish, Polish, Swedish, Filipino, Malay, Russian, Romanian, Ukrainian, Greek, Czech, Danish, Finnish, Bulgarian, Croatian, Slovak, and Tamil; and `eleven_monolingual_v1`, a low-latency model specifically trained for English speech.
 
 ```py
-from elevenlabs import generate, play
+from elevenlabs import play
+from elevenlabs.client import ElevenLabs
 
-audio = generate(
-  # api_key="YOUR_API_KEY", (Defaults to os.getenv(ELEVEN_API_KEY))
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
+
+audio = client.generate(
   text="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! வணக்கம்!",
   voice="Rachel",
   model="eleven_multilingual_v2"
 )
-
 play(audio)
 ```
 
@@ -96,33 +124,35 @@ play(audio)
 
 List all your available voices with `voices()`.
 ```py
-from elevenlabs import voices, generate
+from elevenlabs import play
+from elevenlabs.client import ElevenLabs
+
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
 
-voices = voices()
-audio = generate(text="Hello there!", voice=voices[0])
+response = client.voices.get_all()
+audio = generate(text="Hello there!", voice=response.voices[0])
 print(voices)
 ```
 
 <details> <summary> Show output </summary>
 
 ```py
-Voices(
-    voices=[
-        Voice(
-            voice_id='21m00Tcm4TlvDq8ikWAM',
-            name='Rachel',
-            category='premade',
-            settings=None,
-        ),
-        Voice(
-            voice_id='AZnzlk1XvdvUeBnXmlld',
-            name='Domi',
-            category='premade',
-            settings=None,
-        ),
-        ...
-    ]
-)
+[
+  Voice(
+      voice_id='21m00Tcm4TlvDq8ikWAM',
+      name='Rachel',
+      category='premade',
+      settings=None,
+  ),
+  Voice(
+      voice_id='AZnzlk1XvdvUeBnXmlld',
+      name='Domi',
+      category='premade',
+      settings=None,
+  ),
+]
 ```
 
 </details>
@@ -131,7 +161,12 @@ Build a voice object with custom settings to personalize the voice style, or cal
 `client.voices.get_settings("your-voice-id")` to get the default settings for the voice.
 
 ```py
-from elevenlabs import Voice, VoiceSettings, generate
+from elevenlabs import Voice, VoiceSettings, play
+from elevenlabs.client import ElevenLabs
+
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
 
 audio = generate(
     text="Hello! My name is Bella.",
@@ -151,16 +186,20 @@ play(audio)
 Clone your voice in an instant. Note that voice cloning requires an API key, see below.
 
 ```py
-from elevenlabs import clone, generate, play
+from elevenlabs.client import ElevenLabs
+from elevenlabs import play
 
-voice = clone(
-    # api_key="YOUR_API_KEY", (Defaults to os.getenv(ELEVEN_API_KEY))
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
+
+voice = client.clone(
     name="Alex",
     description="An old American male voice with a slight hoarseness in his throat. Perfect for news", # Optional
     files=["./sample_0.mp3", "./sample_1.mp3", "./sample_2.mp3"],
 )
 
-audio = generate(text="Hi! I'm a cloned voice!", voice=voice)
+audio = client.generate(text="Hi! I'm a cloned voice!", voice=voice)
 
 play(audio)
 ```
@@ -170,28 +209,39 @@ play(audio)
 Stream audio in real-time, as it's being generated.
 
 ```py
-from elevenlabs import generate, stream
+from elevenlabs.client import ElevenLabs
+from elevenlabs import stream
+
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
 
-audio_stream = generate(
-  # api_key="YOUR_API_KEY", (Defaults to os.getenv(ELEVEN_API_KEY))
+audio_stream = client.generate(
   text="This is a... streaming voice!!",
   stream=True
 )
 
 stream(audio_stream)
 ```
 
+Note that `generate` is a helper function. If you'd like to access
+the raw method, simply use `client.text_to_speech.convert_as_stream`. 
+
 ### Input streaming
 Stream text chunks into audio as it's being generated, with <1s latency. Note: if chunks don't end with space or punctuation (" ", ".", "?", "!"), the stream will wait for more text.
 ```py
-from elevenlabs import generate, stream
+from elevenlabs.client import ElevenLabs
+from elevenlabs import stream
+
+client = ElevenLabs(
+  api_key="YOUR_API_KEY", # Defaults to ELEVEN_API_KEY
+)
 
 def text_stream():
     yield "Hi there, I'm Eleven "
     yield "I'm a text to speech API "
 
-audio_stream = generate(
-    # api_key="YOUR_API_KEY", (Defaults to os.getenv(ELEVEN_API_KEY))
+audio_stream = client.generate(
     text=text_stream(),
     voice="Nicole",
     model="eleven_monolingual_v1",
@@ -201,18 +251,9 @@ audio_stream = generate(
 stream(audio_stream)
 ```
 
-## HTTP Client
-The SDK also exposes an HTTP client that you can use to query our 
-various endpoints. 
+Note that `generate` is a helper function. If you'd like to access
+the raw method, simply use `client.text_to_speech.convert_realtime`. 
 
-```python
-from elevenlabs.client import ElevenLabs
-
-eleven = ElevenLabs(
-  api_key="MY_API_KEY" # Defaults to ELEVEN_API_KEY
-)
-models = eleven.models.get_all()
-```
 
 ## Async Client 
 Use `AsyncElevenLabs` if you want to make API calls asynchronously.