Skip to content

历史会话压缩阈值问题 #90

@hjwang-wang

Description

@hjwang-wang

当我在本地通过gpt-oss-20b(本地资源有限,vllm最大只开到18k的窗口大小)执行任务出现压缩异常,发现ActionCompressor应该是针对128k配置的阈值,建议是不是改成动态自适应窗口:
def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None # 添加保存回调,确保压缩后立即保存
) -> List[Dict]:
"""
检查并压缩历史动作

    策略:
    1. 保留最新1条action(完整或压缩大字段)
    2. 之前的所有action总结为一个summary_action
    3. 基于 thinking 和 task_input 判断哪些信息有效、哪些无关
    
    Args:
        action_history: 动作历史
        max_context_window: 最大窗口大小
        thinking: 当前的 thinking 内容(包含 todolist 和计划)
        task_input: 任务需求描述
        
    Returns:
        压缩后的action_history
    """
    if not action_history:
        return []

    # 如果只有一条
    if len(action_history) == 1:
        # 检查是否需要压缩字段
        return [self._compress_action_fields(action_history[0], max_context_window // 2)]

    # 分离最新和历史
    recent_action = action_history[-1]
    historical_actions = action_history[:-1]

    # 计算整体token数
    total_text = self._actions_to_xml(action_history)
    total_tokens = self.count_tokens(total_text + thinking + task_input)

    # 如果不超限,不压缩
    if total_tokens <= max_context_window - 20000:
        return action_history

    safe_print(f"🔄 历史动作需要压缩: {total_tokens} tokens > {max_context_window - 20000}")

    # 压缩策略:
    # 1. 历史 → 基于 thinking 和 task_input 智能总结为5k tokens
    # 2. 最新 → 压缩为max_window的50%

    summary_action = self._summarize_historical_xml(
        self._actions_to_xml(historical_actions),
        target_tokens=5000,  # 历史总结固定5k tokens
        thinking=thinking,
        task_input=task_input,
        max_context_window=max_context_window,
        actions=historical_actions  # 传递原始 actions(用于提取图片)
    )

    # 压缩最新action的大字段(50% of max_window)
    compressed_recent = self._compress_action_fields(
        recent_action,
        int(max_context_window * 0.5),  # 80000 * 0.5 = 40000 tokens
        thinking=thinking,
        task_input=task_input,
        max_context_window=max_context_window
    )

    result = [summary_action, compressed_recent]

    # 验证压缩效果
    result_xml = self._actions_to_xml(result)
    result_tokens = self.count_tokens(result_xml)
    safe_print(
        f"✅ 压缩完成: {total_tokens} tokens → {result_tokens} tokens (压缩比: {result_tokens / total_tokens * 100:.1f}%)")

    return result

==>

def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None
) -> List[Dict]:
"""
自适应所有模型窗口:18k、32k、64k、128k、200k 都通用
全部使用比例,不写死硬编码
"""
if not action_history:
return []

# ===================== 自适应参数(全模型通用)=====================
SAFETY_MARGIN_RATIO = 0.2          # 安全余量 20%
SUMMARY_TOKENS_RATIO = 0.1         # 历史总结占 10% 窗口
RECENT_FIELD_MAX_RATIO = 0.15      # 最新动作字段最大 15% 窗口

# 自动计算当前模型的合理阈值
safety_margin = int(max_context_window * SAFETY_MARGIN_RATIO)
summary_target_tokens = int(max_context_window * SUMMARY_TOKENS_RATIO)
recent_field_max_tokens = int(max_context_window * RECENT_FIELD_MAX_RATIO)
# =================================================================

# 单条动作直接压缩字段
if len(action_history) == 1:
    return [self._compress_action_fields(action_history[0], recent_field_max_tokens)]

# 分离最新一条 + 历史
recent_action = action_history[-1]
historical_actions = action_history[:-1]

# 计算总 token
total_text = self._actions_to_xml(action_history)
total_tokens = self.count_tokens(total_text + thinking + task_input)

# 未超限不压缩
if total_tokens <= max_context_window - safety_margin:
    return action_history

# 开始压缩
summary_action = self._summarize_historical_xml(
    self._actions_to_xml(historical_actions),
    target_tokens=summary_target_tokens,
    thinking=thinking,
    task_input=task_input,
    max_context_window=max_context_window,
    actions=historical_actions
)

compressed_recent = self._compress_action_fields(
    recent_action,
    recent_field_max_tokens,
    thinking=thinking,
    task_input=task_input,
    max_context_window=max_context_window
)

return [summary_action, compressed_recent]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions