Skip to content

refactor: _get_column_info 함수 최적화 #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 15, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 24 additions & 14 deletions llm_utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,30 +93,29 @@ def _get_table_info(max_workers: int = 8) -> Dict[str, str]:
return table_info


def _get_column_info(table_name: str, max_workers: int = 8) -> List[Dict[str, str]]:
def _get_column_info(
table_name: str, urn_table_mapping: Dict[str, str], max_workers: int = 8
) -> List[Dict[str, str]]:
"""table_name에 해당하는 컬럼 이름과 설명을 가져오는 함수

Args:
table_name (str): 테이블 이름
urn_table_mapping (Dict[str, str]): URN-테이블명 매핑 딕셔너리
max_workers (int, optional): 병렬 처리에 사용할 최대 쓰레드 수. Defaults to 8.

Returns:
List[Dict[str, str]]: 컬럼 정보 리스트
"""
fetcher = _get_fetcher()
urns = fetcher.get_urns()
# 해당 테이블의 URN 직접 찾기
target_urn = urn_table_mapping.get(table_name)
if not target_urn:
return []

results = parallel_process(
urns,
lambda urn: _process_column_info(urn, table_name, fetcher),
max_workers=max_workers,
show_progress=False,
)
# Fetcher 생성 및 컬럼 정보 가져오기
fetcher = _get_fetcher()
column_info = fetcher.get_column_names_and_descriptions(target_urn)

for result in results:
if result:
return result
return []
return column_info


def get_info_from_db(max_workers: int = 8) -> List[Document]:
Expand All @@ -130,9 +129,20 @@ def get_info_from_db(max_workers: int = 8) -> List[Document]:
"""
table_info = _get_table_info(max_workers=max_workers)

# URN-테이블명 매핑을 한 번만 생성
fetcher = _get_fetcher()
urns = list(fetcher.get_urns())
urn_table_mapping = {}
for urn in urns:
table_name = fetcher.get_table_name(urn)
if table_name:
urn_table_mapping[table_name] = urn

def process_table_info(item: tuple[str, str]) -> str:
table_name, table_description = item
column_info = _get_column_info(table_name, max_workers=max_workers)
column_info = _get_column_info(
table_name, urn_table_mapping, max_workers=max_workers
)
column_info_str = "\n".join(
[
f"{col['column_name']}: {col['column_description']}"
Expand Down