firebase · LyalinDotCom · Jul 4, 2025 · Jul 4, 2025 · pavelgj · Jul 7, 2025
diff --git a/js/plugins/googleai/README.md b/js/plugins/googleai/README.md
@@ -8,21 +8,133 @@ npm i --save @genkit-ai/googleai
 
 ## Using the plugin
 
+### Basic Setup
+
 ```ts
 import { genkit } from 'genkit';
-import { googleAI, gemini } from '@genkit-ai/googleai';
+import { googleAI } from '@genkit-ai/googleai';
 
 const ai = genkit({
   plugins: [googleAI()],
-  model: gemini('gemini-1.5-flash'),
+  model: googleAI.model('gemini-2.0-flash'),
+});
+```
+
+### Text Generation
+
+```ts
+// Simple text generation
+const { text } = await ai.generate('Explain quantum computing');
+console.log(text);
+
+// With a specific model
+const response = await ai.generate({
+  model: googleAI.model('gemini-1.5-pro'),
+  prompt: 'Write a haiku about coding',
 });
+```
+
+### Multimodal Generation
+
+```ts
+// With images
+const response = await ai.generate({
+  model: googleAI.model('gemini-2.0-flash'),
+  prompt: [
+    { text: 'What is in this image?' },
+    { media: { url: 'data:image/jpeg;base64,...' } }
+  ],
+});
+```
 
-async () => {
-  const { text } = ai.generate('hi Gemini!');
-  console.log(text);
-};
+### Image Generation
+
+```ts
+// Using Imagen
+const imageResponse = await ai.generate({
 const { media } = await ai.generate({ 
 const { media } = await ai.generate({ 
+  model: googleAI.model('imagen-3.0-generate-002'),
+  prompt: 'A serene mountain landscape at sunset',
+});
+
+// Using Imagen 4 (Preview)
+const imagen4Response = await ai.generate({
+  model: googleAI.model('imagen-4.0-generate-preview-06-06'),
+  prompt: 'A futuristic city with flying cars',
+});
+```
+
+### Video Generation
+
+```ts
+// Using Veo 2
+const videoResponse = await ai.generate({
 let { operation } = await ai.generate({ 
 async function downloadVideo(video: MediaPart, path: string) { 
 let { operation } = await ai.generate({ 
 async function downloadVideo(video: MediaPart, path: string) { 
+  model: googleAI.model('veo-2.0-generate-001'),
+  prompt: 'A time-lapse of clouds moving over a city skyline',
+  config: {
+    aspectRatio: '16:9',
+    durationSeconds: 8,
+  }
+});
+
+// Using Veo 3 (if available)
+const veo3Response = await ai.generate({
+  model: googleAI.model('veo-3.0-generate-003'),
+  prompt: 'Ocean waves crashing on a beach at sunset',
+  config: {
+    aspectRatio: '9:16',
+    durationSeconds: 5,
+  }
+});
 ```
 
+### Text-to-Speech
+
+```ts
+// Using Gemini TTS
+const audioResponse = await ai.generate({
 const { media } = await ai.generate({ 
 async function toWav( 
 const { media } = await ai.generate({ 
 async function toWav( 
+  model: googleAI.model('gemini-2.5-flash-preview-tts'),
+  prompt: 'Hello, welcome to our presentation.',
+});
+```
+
+### Native Audio (Conversational)
+
+```ts
+// Using native audio models
+const audioDialogResponse = await ai.generate({
+  model: googleAI.model('gemini-2.5-flash-preview-native-audio-dialog'),
+  prompt: 'Tell me a story about a brave knight',
+});
+```
+
+### Embeddings
+
+```ts
+// Text embeddings
+const embedding = await ai.embed({
+  embedder: googleAI.embedder('gemini-embedding-exp'),
+  content: 'The quick brown fox jumps over the lazy dog',
+});
+```
+
+### Using Fine-tuned Models
+
+```ts
+// Use your fine-tuned model
+const response = await ai.generate({
+  model: googleAI.model('tunedModels/your-model-id'),
+  prompt: 'Your prompt here',
+});
+```
+
+## Supported Models
+
+For a comprehensive list of all supported models with their capabilities and specifications, see [SUPPORTED_MODELS.md](./SUPPORTED_MODELS.md).
+
+The plugin uses dynamic model discovery, so new models released through the Gemini API are often supported automatically without requiring plugin updates.
+
+## Documentation
+
 The sources for this package are in the main [Genkit](https://github.com/firebase/genkit) repo. Please file issues and pull requests against that repo.
 
 Usage information and reference details can be found in [Genkit documentation](https://genkit.dev/docs/plugins/google-genai/).

diff --git a/js/plugins/googleai/SUPPORTED_MODELS.md b/js/plugins/googleai/SUPPORTED_MODELS.md
@@ -0,0 +1,48 @@
+# Supported Models - Google AI Plugin
+
+The `@genkit-ai/googleai` plugin connects to the Gemini API and is designed to be highly flexible. Because of this, Genkit supports nearly any generative or embedding model available through the API, including new and fine-tuned models, often without needing a plugin update.
+
+The following table lists many of the available models to help you get started. However, as the Gemini API evolves rapidly, this list may not be exhaustive. **For the most current and complete list of models, always refer to the official [Google AI Models documentation](https://ai.google.dev/gemini-api/docs/models).**
+
+## Text, Multimodal, and Live Models
+
+| Model Name | Code Reference | Capabilities | Notes |
+| :--- | :--- | :--- | :--- |
+| **Gemini 2.5 Pro** | `googleAI.model('gemini-2.5-pro')` | Text, Vision, Audio, PDF | Enhanced thinking and reasoning. |
+| **Gemini 2.5 Flash** | `googleAI.model('gemini-2.5-flash')` | Text, Vision, Audio | Fast and versatile. |
+| **Gemini 2.5 Flash-Lite** | `googleAI.model('gemini-2.5-flash-lite-preview-06-17')` | Text, Vision, Audio | Cost-efficient, high throughput. | `Preview` |
+| **Gemini 2.5 Flash Live** | `googleAI.model('gemini-live-2.5-flash-preview')` | Bidirectional Voice & Video | For low-latency interactive sessions. | `Preview` |
+| **Gemini 2.0 Flash** | `googleAI.model('gemini-2.0-flash')` | Text, Vision, Audio | Next-gen features and speed. |
+| **Gemini 2.0 Flash Image Gen** | `googleAI.model('gemini-2.0-flash-preview-image-generation')` | Text, Vision, Image Generation | Conversational image generation. | `Preview` |
+| **Gemini 2.0 Flash-Lite** | `googleAI.model('gemini-2.0-flash-lite')` | Text, Vision, Audio | Cost-efficient and low latency. |
+| **Gemini 2.0 Flash Live** | `googleAI.model('gemini-2.0-flash-live-001')` | Bidirectional Voice & Video | For low-latency interactive sessions. |
+| **Gemini 1.5 Pro** | `googleAI.model('gemini-1.5-pro')` | Text, Vision, Audio | Complex reasoning tasks. |
+| **Gemini 1.5 Flash** | `googleAI.model('gemini-1.5-flash')` | Text, Vision, Audio | Fast performance for diverse tasks. |
+| **Gemini 1.5 Flash-8B** | `googleAI.model('gemini-1.5-flash-8b')` | Text, Vision, Audio | High-volume, lower intelligence tasks. |
+
+## Specialized Models (Audio, Image, Video, Embeddings)
+
+| Model Type | Model Name | Code Reference | Capabilities | Notes |
+| :--- | :--- | :--- | :--- | :--- |
+| **Native Audio** | Gemini 2.5 Flash Native Audio (Dialog) | `googleAI.model('gemini-2.5-flash-preview-native-audio-dialog')` | Interleaved Text & Audio | Natural conversational audio. | `Preview` |
+| **Native Audio** | Gemini 2.5 Flash Native Audio (Thinking) | `googleAI.model('gemini-2.5-flash-exp-native-audio-thinking-dialog')` | Interleaved Text & Audio | Includes thinking audio cues. | `Experimental` |
+| **Text-to-Speech** | Gemini 2.5 Flash TTS | `googleAI.model('gemini-2.5-flash-preview-tts')` | Text-to-Speech | Low-latency audio generation. | `Preview` |
+| **Text-to-Speech** | Gemini 2.5 Pro TTS | `googleAI.model('gemini-2.5-pro-preview-tts')` | Text-to-Speech | High-quality audio generation. | `Preview` |
+| **Image Gen** | Imagen 4 | `googleAI.model('imagen-4.0-generate-preview-06-06')` | Image Generation | Latest image generation. | `Preview` |
+| **Image Gen** | Imagen 4 Ultra | `googleAI.model('imagen-4.0-ultra-generate-preview-06-06')` | Image Generation | Highest quality image generation. | `Preview` |
+| **Image Gen** | Imagen 3 | `googleAI.model('imagen-3.0-generate-002')` | Image Generation | High-quality images. |
+| **Video Gen** | Veo 2 | `googleAI.model('veo-2.0-generate-001')` | Video Generation | High-quality video generation. |
+| **Video Gen** | Veo 3 | `googleAI.model('veo-3.0-generate-003')` | Video Generation | Latest video generation model. | `If available` |
+| **Embedding** | Gemini Embedding | `googleAI.embedder('gemini-embedding-exp')` | Text Embedding | Measures relatedness of text. | `Experimental` |
+
+## Notes
+
+- Models marked as `Preview` or `Experimental` may have limited availability or be subject to changes.
+- Video generation models (Veo) return long-running operations that need to be polled for completion.
+- Some models have specific configuration options. Check the model's documentation for details.
+- The plugin uses dynamic model discovery, so new models are often supported automatically without plugin updates.
+- You can also use fine-tuned models by passing their ID: `googleAI.model('tunedModels/your-model-id')`
+
+For usage examples and code samples, see the [README](./README.md).
+
+For the most up-to-date information about model capabilities, limitations, and pricing, please refer to the [official Google AI documentation](https://ai.google.dev/gemini-api/docs/models).
diff --git a/js/plugins/googleai/src/veo.ts b/js/plugins/googleai/src/veo.ts
@@ -31,7 +31,7 @@ import {
 import { getApiKeyFromEnvVar } from './common.js';
 import { Operation as ApiOperation, checkOp, predictModel } from './predict.js';
 
-export type KNOWN_VEO_MODELS = 'veo-2.0-generate-001';
+export type KNOWN_VEO_MODELS = 'veo-2.0-generate-001' | 'veo-3.0-generate-003';
 
 /**
  * See https://ai.google.dev/gemini-api/docs/video