-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
28c29eb
commit 2af520d
Showing
13 changed files
with
417 additions
and
33 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# これは検証用のMDファイルです。 | ||
|
||
以下の秘密キーワードが読み取れているかを確認してください。 | ||
- 秘密キーワード: secret-keyword-for-md-file | ||
- これは秘密キーワードではありません: default-keyword-for-md-file | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
これは検証で使用するためのテキストファイルです。 | ||
読み取れていることを必ず確認すること。 | ||
ファイルの秘密キーワードは、「itc-api-assistant-txt-file-key」です。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
aiohappyeyeballs==2.4.4 | ||
aiohttp==3.11.11 | ||
aiosignal==1.3.2 | ||
annotated-types==0.7.0 | ||
anyio==4.7.0 | ||
asgiref==3.8.1 | ||
attrs==24.3.0 | ||
backoff==2.2.1 | ||
bcrypt==4.2.1 | ||
build==1.2.2.post1 | ||
cachetools==5.5.0 | ||
certifi==2024.12.14 | ||
charset-normalizer==3.4.1 | ||
chroma-hnswlib==0.7.6 | ||
chromadb==0.5.23 | ||
click==8.1.8 | ||
coloredlogs==15.0.1 | ||
dataclasses-json==0.6.7 | ||
Deprecated==1.2.15 | ||
distro==1.9.0 | ||
durationpy==0.9 | ||
fastapi==0.115.6 | ||
filelock==3.16.1 | ||
flatbuffers==24.12.23 | ||
frozenlist==1.5.0 | ||
fsspec==2024.12.0 | ||
google-auth==2.37.0 | ||
googleapis-common-protos==1.66.0 | ||
greenlet==3.1.1 | ||
grpcio==1.68.1 | ||
h11==0.14.0 | ||
httpcore==1.0.7 | ||
httptools==0.6.4 | ||
httpx==0.28.1 | ||
httpx-sse==0.4.0 | ||
huggingface-hub==0.27.0 | ||
humanfriendly==10.0 | ||
idna==3.10 | ||
importlib_metadata==8.5.0 | ||
importlib_resources==6.4.5 | ||
iniconfig==2.0.0 | ||
jiter==0.8.2 | ||
jsonpatch==1.33 | ||
jsonpointer==3.0.0 | ||
kubernetes==31.0.0 | ||
langchain==0.3.13 | ||
langchain-community==0.3.13 | ||
langchain-core==0.3.28 | ||
langchain-openai==0.2.14 | ||
langchain-text-splitters==0.3.4 | ||
langsmith==0.2.6 | ||
markdown-it-py==3.0.0 | ||
marshmallow==3.23.2 | ||
mdurl==0.1.2 | ||
mmh3==5.0.1 | ||
monotonic==1.6 | ||
mpmath==1.3.0 | ||
multidict==6.1.0 | ||
mypy-extensions==1.0.0 | ||
numpy==1.26.4 | ||
oauthlib==3.2.2 | ||
onnxruntime==1.20.1 | ||
openai==1.58.1 | ||
opentelemetry-api==1.29.0 | ||
opentelemetry-exporter-otlp-proto-common==1.29.0 | ||
opentelemetry-exporter-otlp-proto-grpc==1.29.0 | ||
opentelemetry-instrumentation==0.50b0 | ||
opentelemetry-instrumentation-asgi==0.50b0 | ||
opentelemetry-instrumentation-fastapi==0.50b0 | ||
opentelemetry-proto==1.29.0 | ||
opentelemetry-sdk==1.29.0 | ||
opentelemetry-semantic-conventions==0.50b0 | ||
opentelemetry-util-http==0.50b0 | ||
orjson==3.10.12 | ||
overrides==7.7.0 | ||
packaging==24.2 | ||
pluggy==1.5.0 | ||
posthog==3.7.4 | ||
propcache==0.2.1 | ||
protobuf==5.29.2 | ||
pyasn1==0.6.1 | ||
pyasn1_modules==0.4.1 | ||
pydantic==2.10.4 | ||
pydantic-settings==2.7.0 | ||
pydantic_core==2.27.2 | ||
Pygments==2.18.0 | ||
PyPika==0.48.9 | ||
pyproject_hooks==1.2.0 | ||
pytest==8.3.4 | ||
pytest-asyncio==0.25.0 | ||
pytest-mock==3.14.0 | ||
python-dateutil==2.9.0.post0 | ||
python-dotenv==1.0.1 | ||
PyYAML==6.0.2 | ||
regex==2024.11.6 | ||
requests==2.32.3 | ||
requests-oauthlib==2.0.0 | ||
requests-toolbelt==1.0.0 | ||
rich==13.9.4 | ||
rsa==4.9 | ||
shellingham==1.5.4 | ||
six==1.17.0 | ||
sniffio==1.3.1 | ||
SQLAlchemy==2.0.36 | ||
starlette==0.41.3 | ||
sympy==1.13.3 | ||
tenacity==9.0.0 | ||
tiktoken==0.8.0 | ||
tokenizers==0.20.3 | ||
tqdm==4.67.1 | ||
typer==0.15.1 | ||
typing-inspect==0.9.0 | ||
typing_extensions==4.12.2 | ||
urllib3==2.3.0 | ||
uvicorn==0.34.0 | ||
uvloop==0.21.0 | ||
watchfiles==1.0.3 | ||
websocket-client==1.8.0 | ||
websockets==14.1 | ||
wrapt==1.17.0 | ||
yarl==1.18.3 | ||
zipp==3.21.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
OPENAI_API_KEY=<<ここにAPIキーを入力>> | ||
DOCS_PATH=<<ここにDOCSディレクトリのパスを入力>> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from enum import Enum, auto | ||
from typing import List | ||
|
||
|
||
class EnvironmentalVariable(Enum): | ||
OPENAI_API_KEY = auto() | ||
DOCS_PATH = auto() | ||
|
||
# https://platform.openai.com/docs/models/#current-model-aliases | ||
class OpenAIModel(Enum): | ||
o1 = "o1" | ||
gpt_4o = "chatgpt-4o-latest" | ||
|
||
|
||
class DocumentConfig: | ||
# 複数の拡張子をサポート | ||
GLOB_PATTERN = "*.*" | ||
|
||
# 除外するパターン | ||
EXCLUDE_PATTERN = [ | ||
"**/.git/**", # gitディレクトリを除外 | ||
"**/__pycache__/**", # pythonキャッシュを除外 | ||
"**/node_modules/**", # node.jsモジュールを除外 | ||
"**/.env*" # 環境設定ファイルを除外 | ||
] | ||
|
||
# ローダーの基本設定 | ||
LOADER_KWARGS = { | ||
"autodetect_encoding": True, # エンコーディングの自動検出 | ||
"encoding": "utf-8", # デフォルトエンコーディング | ||
} | ||
|
||
# DirectoryLoaderの設定 | ||
LOADER_CONFIG = { | ||
"recursive": True, # サブディレクトリも検索 | ||
"silent_errors": True, # エラーを無視して続行 | ||
"load_hidden": False, # 隠しファイルは読み込まない | ||
"use_multithreading": True, # マルチスレッドを使用 | ||
"max_concurrency": 4, # 最大スレッド数 | ||
"show_progress": True # プログレスバーを表示 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import os | ||
from langchain_community.document_loaders import DirectoryLoader | ||
from langchain_community.document_loaders.text import TextLoader # TextLoaderを代わりに使用 | ||
|
||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain_community.vectorstores import Chroma | ||
from langchain_openai import OpenAIEmbeddings | ||
from langchain_core.prompts import ChatPromptTemplate | ||
from langchain.chains import RetrievalQA | ||
|
||
from config import DocumentConfig | ||
|
||
|
||
class DocumentProcessor: | ||
def __init__(self, docs_dir: str, chunk_size: int = 1000, chunk_overlap: int = 200): | ||
self.docs_dir = docs_dir | ||
self.chunk_size = chunk_size | ||
self.chunk_overlap = chunk_overlap | ||
self.vector_store = None | ||
|
||
def load_documents(self): | ||
"""指定ディレクトリからドキュメントを読み込む""" | ||
loader = DirectoryLoader( | ||
path=self.docs_dir, | ||
glob=DocumentConfig.GLOB_PATTERN, | ||
exclude=DocumentConfig.EXCLUDE_PATTERN, | ||
loader_cls=TextLoader, # より汎用的なローダーを使用 | ||
loader_kwargs=DocumentConfig.LOADER_KWARGS, | ||
**DocumentConfig.LOADER_CONFIG | ||
) | ||
|
||
try: | ||
documents = loader.load() | ||
print(f"\n読み込み完了: {len(documents)}個のドキュメント") | ||
print("\n読み込んだファイル:") | ||
|
||
# ファイル形式ごとの集計 | ||
format_count = {} | ||
for doc in documents: | ||
filepath = doc.metadata['source'] | ||
ext = filepath.split('.')[-1].lower() | ||
format_count[ext] = format_count.get(ext, 0) + 1 | ||
print(f"- {filepath}") | ||
|
||
print("\nファイル形式の集計:") | ||
for ext, count in format_count.items(): | ||
print(f"- {ext}: {count}ファイル") | ||
|
||
return documents | ||
|
||
except Exception as e: | ||
print(f"エラーが発生しました: {str(e)}") | ||
return [] | ||
|
||
def process_documents(self): | ||
"""ドキュメントの処理とベクトルストアの作成""" | ||
documents = self.load_documents() | ||
|
||
# テキストを適切なサイズにチャンク分割 | ||
text_splitter = RecursiveCharacterTextSplitter( | ||
chunk_size=self.chunk_size, | ||
chunk_overlap=self.chunk_overlap | ||
) | ||
texts = text_splitter.split_documents(documents) | ||
print(f"{len(texts)}個のテキストチャンクを作成しました") | ||
|
||
# ベクトルストアの作成 | ||
self.vector_store = Chroma.from_documents( | ||
documents=texts, | ||
embedding=OpenAIEmbeddings() | ||
) | ||
print("ベクトルストアを作成しました") | ||
|
||
def setup_qa_chain(self, model): | ||
"""QA chainのセットアップ""" | ||
if not self.vector_store: | ||
raise ValueError("先にprocess_documents()を実行してください") | ||
|
||
template = """以下の情報を元に、質問に答えてください: | ||
コンテキスト: {context} | ||
質問: {question} | ||
回答は日本語で、できるだけ詳しく説明してください。""" | ||
|
||
prompt = ChatPromptTemplate.from_template(template) | ||
|
||
self.qa_chain = RetrievalQA.from_chain_type( | ||
model, | ||
retriever=self.vector_store.as_retriever(), | ||
chain_type_kwargs={"prompt": prompt} | ||
) | ||
|
||
async def ask_question(self, question: str) -> str: | ||
"""質問に対する回答を生成""" | ||
if not self.qa_chain: | ||
raise ValueError("先にsetup_qa_chain()を実行してください") | ||
|
||
response = await self.qa_chain.ainvoke({"query": question}) | ||
return response["result"] | ||
|
||
|
||
def create_document_processor(docs_dir: str) -> DocumentProcessor: | ||
"""DocumentProcessorのインスタンスを作成して初期化""" | ||
processor = DocumentProcessor(docs_dir) | ||
processor.process_documents() | ||
return processor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from langchain_core.messages import HumanMessage, SystemMessage | ||
import os | ||
from typing import Optional | ||
|
||
from config import EnvironmentalVariable as Ev | ||
|
||
|
||
def check_environment() -> tuple[bool, Optional[str]]: | ||
"""環境変数の設定を確認""" | ||
api_key = os.getenv(Ev.OPENAI_API_KEY.name) | ||
if not api_key: | ||
return False, "OpenAI APIキーが設定されていません" | ||
return True, None | ||
|
||
|
||
def test_chat_completion(model) -> tuple[bool, Optional[str]]: | ||
"""ChatGPTの基本機能テスト""" | ||
try: | ||
messages = [ | ||
SystemMessage(content="あなたは親切なアシスタントです。"), | ||
HumanMessage(content="こんにちは") | ||
] | ||
response = model.invoke(messages) | ||
if not response.content: | ||
return False, "応答が空です" | ||
return True, response.content | ||
except Exception as e: | ||
return False, f"エラーが発生しました: {str(e)}" | ||
|
||
|
||
def run_health_check(model) -> bool: | ||
"""全ての健全性チェックを実行""" | ||
print("健全性チェックを開始します...") | ||
|
||
# 環境変数チェック | ||
env_ok, env_message = check_environment() | ||
print(f"\n1. 環境変数チェック: {'✓' if env_ok else '✗'}") | ||
if env_message: | ||
print(f" {env_message}") | ||
|
||
# Chat機能チェック | ||
chat_ok, chat_message = test_chat_completion(model) | ||
print(f"\n2. Chat完了機能チェック: {'✓' if chat_ok else '✗'}") | ||
if chat_message: | ||
print(f" テスト応答: {chat_message}") | ||
|
||
all_checks_passed = all([env_ok, chat_ok]) | ||
print(f"\n総合結果: {'✓ 全てのチェックに成功しました' if all_checks_passed else '✗ 一部のチェックに失敗しました'}") | ||
|
||
return all_checks_passed |
Oops, something went wrong.