Skip to content
13 changes: 6 additions & 7 deletions src/components/menu.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ import { useTranslation } from 'react-i18next'

import homeStore from '@/features/stores/home'
import menuStore from '@/features/stores/menu'
import settingsStore, {
multiModalAIServiceKey,
} from '@/features/stores/settings'
import settingsStore from '@/features/stores/settings'
import slideStore from '@/features/stores/slide'
import { AssistantText } from './assistantText'
import { ChatLog } from './chatLog'
Expand All @@ -14,7 +12,7 @@ import Settings from './settings'
import { Webcam } from './webcam'
import Slides from './slides'
import Capture from './capture'
import { multiModalAIServices } from '@/features/stores/settings'
import { isCurrentModelMultiModal } from '@/features/utils/multimodal'

// モバイルデバイス検出用のカスタムフック
const useIsMobile = () => {
Expand Down Expand Up @@ -230,9 +228,10 @@ export const Menu = () => {
)}
</div>
{!youtubeMode &&
multiModalAIServices.includes(
selectAIService as multiModalAIServiceKey
) && (
// multiModalAIServices.includes(
// selectAIService as multiModalAIServiceKey
// )
isCurrentModelMultiModal() && (
<>
<div className="order-3">
<IconButton
Expand Down
7 changes: 5 additions & 2 deletions src/components/settings/modelProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { TextButton } from '../textButton'
import { useCallback } from 'react'
import Image from 'next/image'
import { Listbox } from '@headlessui/react'
import { multiModalAIServices } from '@/features/stores/settings'
import { multiModalModels } from '@/features/constants/aiModels'
import {
AudioModeInputType,
OpenAITTSVoice,
Expand Down Expand Up @@ -138,7 +138,10 @@ const ModelProvider = () => {
selectAIModel: defaultModels[newService],
})

if (!multiModalAIServices.includes(newService as any)) {
const newModel = defaultModels[newService]
const isMultiModal = multiModalModels.includes(newModel as any)

if (!isMultiModal) {
menuStore.setState({ showWebcam: false })

settingsStore.setState({
Expand Down
20 changes: 11 additions & 9 deletions src/components/settings/slide.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ import { useTranslation } from 'react-i18next'
import { useEffect, useState } from 'react'
import Image from 'next/image'
import Link from 'next/link' // Link をインポート
import settingsStore, {
multiModalAIServices,
multiModalAIServiceKey,
} from '@/features/stores/settings'
import settingsStore from '@/features/stores/settings'
import menuStore from '@/features/stores/menu'
import slideStore from '@/features/stores/slide'
import { TextButton } from '../textButton'
import SlideConvert from './slideConvert'
import { isCurrentModelMultiModal } from '@/features/utils/multimodal'

const Slide = () => {
const { t } = useTranslation()
Expand Down Expand Up @@ -73,9 +71,10 @@ const Slide = () => {
<TextButton
onClick={toggleSlideMode}
disabled={
!multiModalAIServices.includes(
selectAIService as multiModalAIServiceKey
)
// !multiModalAIServices.includes(
// selectAIService as multiModalAIServiceKey
// )
!isCurrentModelMultiModal()
}
>
{slideMode ? t('StatusOn') : t('StatusOff')}
Expand Down Expand Up @@ -126,9 +125,12 @@ const Slide = () => {
</Link>
)}
</div>
{multiModalAIServices.includes(
{/* multiModalAIServices.includes(
selectAIService as multiModalAIServiceKey
) && <SlideConvert onFolderUpdate={handleFolderUpdate} />}
) && */}
{isCurrentModelMultiModal() && (
<SlideConvert onFolderUpdate={handleFolderUpdate} />
)}
</>
)}
</>
Expand Down
5 changes: 3 additions & 2 deletions src/components/settings/slideConvert.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import React, { useState, useEffect } from 'react'
import { useTranslation } from 'react-i18next'
import settingsStore, {
multiModalAIServiceKey,
multiModalAIServices,
} from '@/features/stores/settings'
import {
getDefaultModel,
getSlideConvertModels,
} from '@/features/constants/aiModels'
import { isCurrentModelMultiModal } from '@/features/utils/multimodal'
import { TextButton } from '../textButton'

interface SlideConvertProps {
Expand Down Expand Up @@ -43,7 +43,8 @@ const SlideConvert: React.FC<SlideConvertProps> = ({ onFolderUpdate }) => {
const handleFormSubmit = async (event: React.FormEvent) => {
event.preventDefault()

if (!multiModalAIServices.includes(aiService)) {
// if (!multiModalAIServices.includes(aiService)) {
if (!isCurrentModelMultiModal()) {
alert(t('InvalidAIService'))
return
}
Expand Down
5 changes: 3 additions & 2 deletions src/components/settings/youtube.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import menuStore from '@/features/stores/menu'
import settingsStore from '@/features/stores/settings'
import slideStore from '@/features/stores/slide'
import { TextButton } from '../textButton'
import { multiModalAIServices } from '@/features/stores/settings'
import { isCurrentModelMultiModal } from '@/features/utils/multimodal'

const YouTube = () => {
const youtubeApiKey = settingsStore((s) => s.youtubeApiKey)
Expand Down Expand Up @@ -113,7 +113,8 @@ const YouTube = () => {
})
}
disabled={
!multiModalAIServices.includes(selectAIService as any) ||
// !multiModalAIServices.includes(selectAIService as any) ||
!isCurrentModelMultiModal() ||
slideMode ||
externalLinkageMode
}
Expand Down
63 changes: 56 additions & 7 deletions src/features/constants/aiModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ export const aiModels: Record<AIService, string[]> = {
'llama-3.3-70b-versatile',
'llama3-8b-8192',
'mixtral-8x7b-32768',
'llama-3.1-8b',
'llama-3.1-70b',
],
cohere: [
'command-light',
Expand All @@ -61,17 +63,30 @@ export const aiModels: Record<AIService, string[]> = {
],
perplexity: [
'llama-3-sonar-large-32k-online',
'sonar-small-online',
'sonar-medium-online',
'sonar-large-online',
'llama-3.1-sonar-small-128k-online',
'llama-3.1-sonar-large-128k-online',
'llama-3.1-sonar-huge-128k-online',
'llama-3.1-sonar-small-128k-chat',
'llama-3.1-sonar-large-128k-chat',
],
fireworks: [
'accounts/fireworks/models/firefunction-v2',
'accounts/fireworks/models/llama-v3-8b',
'accounts/fireworks/models/llama-v3-70b',
'accounts/fireworks/models/llama-v3p1-405b-instruct',
'accounts/fireworks/models/llama-v3p1-70b-instruct',
'accounts/fireworks/models/llama-v3p1-8b-instruct',
'accounts/fireworks/models/mixtral-8x22b-instruct',
],
deepseek: ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner'],
openrouter: [],
openrouter: [
'anthropic/claude-3.5-sonnet',
'anthropic/claude-3-opus',
'google/gemini-pro-vision',
'meta-llama/llama-3.1-8b-instruction',
'meta-llama/llama-3.1-405b-instruct',
'mistralai/mistral-large-2',
],
lmstudio: [],
ollama: [],
dify: [],
Expand Down Expand Up @@ -122,10 +137,10 @@ export const defaultModels: Record<
google: 'gemini-1.5-flash-latest',
azure: '',
xai: 'grok-3',
groq: 'gemma2-9b-it',
cohere: 'command-r-plus',
groq: 'llama-3.3-70b-versatile',
cohere: 'command-r-plus-08-2024',
mistralai: 'mistral-large-latest',
perplexity: 'llama-3-sonar-large-32k-online',
perplexity: 'llama-3.1-sonar-large-128k-online',
fireworks: 'accounts/fireworks/models/firefunction-v2',
deepseek: 'deepseek-chat',
openrouter: 'anthropic/claude-3.5-sonnet',
Expand Down Expand Up @@ -236,7 +251,41 @@ export function getOpenAITTSModels(): string[] {
}

export const googleSearchGroundingModels = [
'gemini-2.5-pro-preview-05-06',
'gemini-2.5-flash-preview-04-17',
'gemini-1.5-flash-latest',
'gemini-1.5-pro-latest',
'gemini-1.5-flash-8b-latest',
] as const

/**
* マルチモーダル対応モデルのリスト
*/
export const multiModalModels = [
'gpt-4o-2024-11-20',
'gpt-4.5-preview-2025-02-27',
'gpt-4o-mini-2024-07-18',
'chatgpt-4o-latest',
'gpt-4.1-2025-04-14',
'gpt-4.1-nano-2025-04-14',
'gpt-4.1-mini-2025-04-14',
'claude-3-5-sonnet-20241022',
'claude-3-7-sonnet-20250219',
'claude-3-opus-20240229',
'claude-3-5-haiku-20241022',
'gemini-2.0-flash-001',
'gemini-2.5-pro-preview-05-06',
'gemini-2.5-flash-preview-04-17',
'gemini-1.5-flash-latest',
'gemini-1.5-flash-8b-latest',
'gemini-1.5-pro-latest',
'grok-3',
'grok-2-vision-1212',
'anthropic/claude-3.5-sonnet',
'anthropic/claude-3-opus',
'google/gemini-pro-vision',
'meta-llama/llama-3.1-8b-instruction',
'meta-llama/llama-3.1-405b-instruct',
] as const

export type MultiModalModel = (typeof multiModalModels)[number]
5 changes: 3 additions & 2 deletions src/features/slide/slideAIHelpers.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { getVercelAIChatResponse } from '@/features/chat/vercelAIChat'
import settingsStore, {
multiModalAIServiceKey,
multiModalAIServices,
} from '@/features/stores/settings'
import { isCurrentModelMultiModal } from '@/features/utils/multimodal'

export const judgeSlide = async (
queryText: string,
Expand All @@ -12,7 +12,8 @@ export const judgeSlide = async (
const ss = settingsStore.getState()
const aiService = ss.selectAIService as multiModalAIServiceKey

if (!multiModalAIServices.includes(aiService)) {
// if (!multiModalAIServices.includes(aiService)) {
if (!isCurrentModelMultiModal()) {
throw new Error('Invalid AI service')
}

Expand Down
2 changes: 2 additions & 0 deletions src/features/stores/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ export const multiModalAIServices = [
'anthropic',
'google',
'azure',
'xai',
'openrouter',
] as const
export type multiModalAIServiceKey = (typeof multiModalAIServices)[number]

Expand Down
13 changes: 13 additions & 0 deletions src/features/utils/multimodal.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { multiModalModels } from '../constants/aiModels'
import settingsStore from '../stores/settings'

/**
* 現在選択されているモデルがマルチモーダル対応かどうかを判定する
* @returns マルチモーダル対応の場合true、そうでない場合false
*/
export const isCurrentModelMultiModal = (): boolean => {
const ss = settingsStore.getState()
const currentModel = ss.selectAIModel

return multiModalModels.includes(currentModel as any)
}
2 changes: 2 additions & 0 deletions src/pages/api/convertSlide.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ async function createSlideLine(
anthropic: () => createAnthropic({ apiKey }),
google: () => createGoogleGenerativeAI({ apiKey }),
azure: () => {},
xai: () => {},
openrouter: () => {},
}

const aiServiceInstance = aiServiceConfig[aiService as multiModalAIServiceKey]
Expand Down
16 changes: 12 additions & 4 deletions src/pages/api/services/vercelAi.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import { Message } from '@/features/messages/messages'
import { createOpenAI } from '@ai-sdk/openai'
import { createAnthropic } from '@ai-sdk/anthropic'
import { createXai } from '@ai-sdk/xai'
// import { createXai } from '@ai-sdk/xai'
import { createGoogleGenerativeAI } from '@ai-sdk/google'
import { createCohere } from '@ai-sdk/cohere'
import { createMistral } from '@ai-sdk/mistral'
import { createAzure } from '@ai-sdk/azure'
import { createDeepSeek } from '@ai-sdk/deepseek'
import { createOpenAICompatible } from '@ai-sdk/openai-compatible'
import { createOllama } from 'ollama-ai-provider'
import { createOpenRouter } from '@openrouter/ai-sdk-provider'
// import { createOpenRouter } from '@openrouter/ai-sdk-provider'
import { streamText, generateText, CoreMessage } from 'ai'
import { VercelAIService } from '@/features/constants/settings'

Expand All @@ -27,7 +27,11 @@ export const aiServiceConfig: AIServiceConfig = {
resourceName,
apiKey,
}),
xai: ({ apiKey }) => createXai({ apiKey }),
xai: ({ apiKey }) =>
createOpenAI({
baseURL: 'https://api.xai.com/v1',
apiKey,
}),
groq: ({ apiKey }) =>
createOpenAI({
baseURL: 'https://api.groq.com/openai/v1',
Expand All @@ -43,7 +47,11 @@ export const aiServiceConfig: AIServiceConfig = {
apiKey,
}),
deepseek: ({ apiKey }) => createDeepSeek({ apiKey }),
openrouter: ({ apiKey }) => createOpenRouter({ apiKey }),
openrouter: ({ apiKey }) =>
createOpenAI({
baseURL: 'https://openrouter.ai/api/v1',
apiKey,
}),
lmstudio: ({ baseURL }) =>
createOpenAICompatible({ name: 'lmstudio', baseURL }),
ollama: ({ baseURL }) => createOllama({ baseURL }),
Expand Down
15 changes: 14 additions & 1 deletion website/document/en/guide/ai/multimodal.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Multimodal AI models are AI models that can understand and process multiple form

```bash
# Select an AI service that supports multimodal
# Multimodal supported: openai, anthropic, google, azure
# Multimodal supported: openai, anthropic, google, azure, xai, openrouter
NEXT_PUBLIC_SELECT_AI_SERVICE=openai

# Select a model that supports multimodal
Expand Down Expand Up @@ -47,6 +47,19 @@ AITuberKit supports the following multimodal-compatible AI services and models:

- Depends on settings in the Azure portal

### xAI

- grok-3
- grok-2-vision-1212

### OpenRouter

- anthropic/claude-3.5-sonnet
- anthropic/claude-3-opus
- google/gemini-pro-vision
- meta-llama/llama-3.1-8b-instruction
- meta-llama/llama-3.1-405b-instruct

## How to Use

To utilize multimodal features, follow these steps:
Expand Down
15 changes: 14 additions & 1 deletion website/document/guide/ai/multimodal.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

```bash
# マルチモーダル対応のAIサービスを選択
# マルチモーダル対応: openai, anthropic, google, azure
# マルチモーダル対応: openai, anthropic, google, azure, xai, openrouter
NEXT_PUBLIC_SELECT_AI_SERVICE=openai

# マルチモーダル対応のモデルを選択
Expand Down Expand Up @@ -47,6 +47,19 @@ AITuberKitでは、以下のマルチモーダル対応AIサービスおよび

- Azureポータルでの設定に依存

### xAI

- grok-3
- grok-2-vision-1212

### OpenRouter

- anthropic/claude-3.5-sonnet
- anthropic/claude-3-opus
- google/gemini-pro-vision
- meta-llama/llama-3.1-8b-instruction
- meta-llama/llama-3.1-405b-instruct

## 使用方法

マルチモーダル機能を活用するには、以下の手順に従ってください:
Expand Down