当我在本地通过gpt-oss-20b(本地资源有限,vllm最大只开到18k的窗口大小)执行任务出现压缩异常,发现ActionCompressor应该是针对128k配置的阈值,建议是不是改成动态自适应窗口:
def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None # 添加保存回调,确保压缩后立即保存
) -> List[Dict]:
"""
检查并压缩历史动作
策略:
1. 保留最新1条action(完整或压缩大字段)
2. 之前的所有action总结为一个summary_action
3. 基于 thinking 和 task_input 判断哪些信息有效、哪些无关
Args:
action_history: 动作历史
max_context_window: 最大窗口大小
thinking: 当前的 thinking 内容(包含 todolist 和计划)
task_input: 任务需求描述
Returns:
压缩后的action_history
"""
if not action_history:
return []
# 如果只有一条
if len(action_history) == 1:
# 检查是否需要压缩字段
return [self._compress_action_fields(action_history[0], max_context_window // 2)]
# 分离最新和历史
recent_action = action_history[-1]
historical_actions = action_history[:-1]
# 计算整体token数
total_text = self._actions_to_xml(action_history)
total_tokens = self.count_tokens(total_text + thinking + task_input)
# 如果不超限,不压缩
if total_tokens <= max_context_window - 20000:
return action_history
safe_print(f"🔄 历史动作需要压缩: {total_tokens} tokens > {max_context_window - 20000}")
# 压缩策略:
# 1. 历史 → 基于 thinking 和 task_input 智能总结为5k tokens
# 2. 最新 → 压缩为max_window的50%
summary_action = self._summarize_historical_xml(
self._actions_to_xml(historical_actions),
target_tokens=5000, # 历史总结固定5k tokens
thinking=thinking,
task_input=task_input,
max_context_window=max_context_window,
actions=historical_actions # 传递原始 actions(用于提取图片)
)
# 压缩最新action的大字段(50% of max_window)
compressed_recent = self._compress_action_fields(
recent_action,
int(max_context_window * 0.5), # 80000 * 0.5 = 40000 tokens
thinking=thinking,
task_input=task_input,
max_context_window=max_context_window
)
result = [summary_action, compressed_recent]
# 验证压缩效果
result_xml = self._actions_to_xml(result)
result_tokens = self.count_tokens(result_xml)
safe_print(
f"✅ 压缩完成: {total_tokens} tokens → {result_tokens} tokens (压缩比: {result_tokens / total_tokens * 100:.1f}%)")
return result
==>
def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None
) -> List[Dict]:
"""
自适应所有模型窗口:18k、32k、64k、128k、200k 都通用
全部使用比例,不写死硬编码
"""
if not action_history:
return []
# ===================== 自适应参数(全模型通用)=====================
SAFETY_MARGIN_RATIO = 0.2 # 安全余量 20%
SUMMARY_TOKENS_RATIO = 0.1 # 历史总结占 10% 窗口
RECENT_FIELD_MAX_RATIO = 0.15 # 最新动作字段最大 15% 窗口
# 自动计算当前模型的合理阈值
safety_margin = int(max_context_window * SAFETY_MARGIN_RATIO)
summary_target_tokens = int(max_context_window * SUMMARY_TOKENS_RATIO)
recent_field_max_tokens = int(max_context_window * RECENT_FIELD_MAX_RATIO)
# =================================================================
# 单条动作直接压缩字段
if len(action_history) == 1:
return [self._compress_action_fields(action_history[0], recent_field_max_tokens)]
# 分离最新一条 + 历史
recent_action = action_history[-1]
historical_actions = action_history[:-1]
# 计算总 token
total_text = self._actions_to_xml(action_history)
total_tokens = self.count_tokens(total_text + thinking + task_input)
# 未超限不压缩
if total_tokens <= max_context_window - safety_margin:
return action_history
# 开始压缩
summary_action = self._summarize_historical_xml(
self._actions_to_xml(historical_actions),
target_tokens=summary_target_tokens,
thinking=thinking,
task_input=task_input,
max_context_window=max_context_window,
actions=historical_actions
)
compressed_recent = self._compress_action_fields(
recent_action,
recent_field_max_tokens,
thinking=thinking,
task_input=task_input,
max_context_window=max_context_window
)
return [summary_action, compressed_recent]
当我在本地通过gpt-oss-20b(本地资源有限,vllm最大只开到18k的窗口大小)执行任务出现压缩异常,发现ActionCompressor应该是针对128k配置的阈值,建议是不是改成动态自适应窗口:
def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None # 添加保存回调,确保压缩后立即保存
) -> List[Dict]:
"""
检查并压缩历史动作
==>
def compress_if_needed(
self,
action_history: List[Dict],
max_context_window: int,
thinking: str = "",
task_input: str = "",
save_callback=None
) -> List[Dict]:
"""
自适应所有模型窗口:18k、32k、64k、128k、200k 都通用
全部使用比例,不写死硬编码
"""
if not action_history:
return []