历史会话压缩阈值问题

当我在本地通过gpt-oss-20b（本地资源有限，vllm最大只开到18k的窗口大小）执行任务出现压缩异常，发现ActionCompressor应该是针对128k配置的阈值，建议是不是改成动态自适应窗口：
def compress_if_needed(
            self,
            action_history: List[Dict],
            max_context_window: int,
            thinking: str = "",
            task_input: str = "",
            save_callback=None  # 添加保存回调，确保压缩后立即保存
    ) -> List[Dict]:
        """
        检查并压缩历史动作
        
        策略：
        1. 保留最新1条action（完整或压缩大字段）
        2. 之前的所有action总结为一个summary_action
        3. 基于 thinking 和 task_input 判断哪些信息有效、哪些无关
        
        Args:
            action_history: 动作历史
            max_context_window: 最大窗口大小
            thinking: 当前的 thinking 内容（包含 todolist 和计划）
            task_input: 任务需求描述
            
        Returns:
            压缩后的action_history
        """
        if not action_history:
            return []

        # 如果只有一条
        if len(action_history) == 1:
            # 检查是否需要压缩字段
            return [self._compress_action_fields(action_history[0], max_context_window // 2)]

        # 分离最新和历史
        recent_action = action_history[-1]
        historical_actions = action_history[:-1]

        # 计算整体token数
        total_text = self._actions_to_xml(action_history)
        total_tokens = self.count_tokens(total_text + thinking + task_input)

        # 如果不超限，不压缩
        if total_tokens <= max_context_window - 20000:
            return action_history

        safe_print(f"🔄 历史动作需要压缩: {total_tokens} tokens > {max_context_window - 20000}")

        # 压缩策略：
        # 1. 历史 → 基于 thinking 和 task_input 智能总结为5k tokens
        # 2. 最新 → 压缩为max_window的50%

        summary_action = self._summarize_historical_xml(
            self._actions_to_xml(historical_actions),
            target_tokens=5000,  # 历史总结固定5k tokens
            thinking=thinking,
            task_input=task_input,
            max_context_window=max_context_window,
            actions=historical_actions  # 传递原始 actions（用于提取图片）
        )

        # 压缩最新action的大字段（50% of max_window）
        compressed_recent = self._compress_action_fields(
            recent_action,
            int(max_context_window * 0.5),  # 80000 * 0.5 = 40000 tokens
            thinking=thinking,
            task_input=task_input,
            max_context_window=max_context_window
        )

        result = [summary_action, compressed_recent]

        # 验证压缩效果
        result_xml = self._actions_to_xml(result)
        result_tokens = self.count_tokens(result_xml)
        safe_print(
            f"✅ 压缩完成: {total_tokens} tokens → {result_tokens} tokens (压缩比: {result_tokens / total_tokens * 100:.1f}%)")

        return result

==>

def compress_if_needed(
    self,
    action_history: List[Dict],
    max_context_window: int,
    thinking: str = "",
    task_input: str = "",
    save_callback=None
) -> List[Dict]:
    """
    自适应所有模型窗口：18k、32k、64k、128k、200k 都通用
    全部使用比例，不写死硬编码
    """
    if not action_history:
        return []

    # ===================== 自适应参数（全模型通用）=====================
    SAFETY_MARGIN_RATIO = 0.2          # 安全余量 20%
    SUMMARY_TOKENS_RATIO = 0.1         # 历史总结占 10% 窗口
    RECENT_FIELD_MAX_RATIO = 0.15      # 最新动作字段最大 15% 窗口

    # 自动计算当前模型的合理阈值
    safety_margin = int(max_context_window * SAFETY_MARGIN_RATIO)
    summary_target_tokens = int(max_context_window * SUMMARY_TOKENS_RATIO)
    recent_field_max_tokens = int(max_context_window * RECENT_FIELD_MAX_RATIO)
    # =================================================================

    # 单条动作直接压缩字段
    if len(action_history) == 1:
        return [self._compress_action_fields(action_history[0], recent_field_max_tokens)]

    # 分离最新一条 + 历史
    recent_action = action_history[-1]
    historical_actions = action_history[:-1]

    # 计算总 token
    total_text = self._actions_to_xml(action_history)
    total_tokens = self.count_tokens(total_text + thinking + task_input)

    # 未超限不压缩
    if total_tokens <= max_context_window - safety_margin:
        return action_history

    # 开始压缩
    summary_action = self._summarize_historical_xml(
        self._actions_to_xml(historical_actions),
        target_tokens=summary_target_tokens,
        thinking=thinking,
        task_input=task_input,
        max_context_window=max_context_window,
        actions=historical_actions
    )

    compressed_recent = self._compress_action_fields(
        recent_action,
        recent_field_max_tokens,
        thinking=thinking,
        task_input=task_input,
        max_context_window=max_context_window
    )

    return [summary_action, compressed_recent]

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

历史会话压缩阈值问题 #90

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

历史会话压缩阈值问题 #90

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions