Skip to content

Commit 25fd72f

Browse files
committed
feat: gemini 3.0 pro preview added (as default gemini pro model)
refactor: code cleanup
1 parent 749bc73 commit 25fd72f

19 files changed

+61
-43
lines changed

conf/gemini_models.json

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626
},
2727
"models": [
2828
{
29-
"model_name": "gemini-2.5-pro",
30-
"friendly_name": "Gemini (Pro 2.5)",
29+
"model_name": "gemini-3-pro-preview",
30+
"friendly_name": "Gemini Pro 3.0 Preview",
3131
"aliases": [
3232
"pro",
33-
"gemini pro",
33+
"gemini3",
3434
"gemini-pro"
3535
],
3636
"intelligence_score": 18,
@@ -48,6 +48,27 @@
4848
"allow_code_generation": true,
4949
"max_image_size_mb": 32.0
5050
},
51+
{
52+
"model_name": "gemini-2.5-pro",
53+
"friendly_name": "Gemini Pro 2.5",
54+
"aliases": [
55+
"gemini-pro-2.5"
56+
],
57+
"intelligence_score": 18,
58+
"description": "Older Model. 1M context - Complex problems, architecture, deep analysis",
59+
"context_window": 1048576,
60+
"max_output_tokens": 65536,
61+
"max_thinking_tokens": 32768,
62+
"supports_extended_thinking": true,
63+
"supports_system_prompts": true,
64+
"supports_streaming": true,
65+
"supports_function_calling": true,
66+
"supports_json_mode": true,
67+
"supports_images": true,
68+
"supports_temperature": true,
69+
"allow_code_generation": true,
70+
"max_image_size_mb": 32.0
71+
},
5172
{
5273
"model_name": "gemini-2.0-flash",
5374
"friendly_name": "Gemini (Flash 2.0)",

config.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,25 @@
4343

4444

4545
# Temperature defaults for different tool types
46+
# NOTE: Gemini 3.0 Pro notes suggest temperature should be set at 1.0
47+
# in most cases. Lowering it can affect the models 'reasoning' abilities.
48+
# Newer models / inference stacks are able to handle their randomness better.
49+
4650
# Temperature controls the randomness/creativity of model responses
4751
# Lower values (0.0-0.3) produce more deterministic, focused responses
4852
# Higher values (0.7-1.0) produce more creative, varied responses
4953

5054
# TEMPERATURE_ANALYTICAL: Used for tasks requiring precision and consistency
5155
# Ideal for code review, debugging, and error analysis where accuracy is critical
52-
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
56+
TEMPERATURE_ANALYTICAL = 1.0 # For code review, debugging
5357

5458
# TEMPERATURE_BALANCED: Middle ground for general conversations
5559
# Provides a good balance between consistency and helpful variety
56-
TEMPERATURE_BALANCED = 0.5 # For general chat
60+
TEMPERATURE_BALANCED = 1.0 # For general chat
5761

5862
# TEMPERATURE_CREATIVE: Higher temperature for exploratory tasks
5963
# Used when brainstorming, exploring alternatives, or architectural discussions
60-
TEMPERATURE_CREATIVE = 0.7 # For architecture, deep thinking
64+
TEMPERATURE_CREATIVE = 1.0 # For architecture, deep thinking
6165

6266
# Thinking Mode Defaults
6367
# DEFAULT_THINKING_MODE_THINKDEEP: Default thinking depth for extended reasoning tool

providers/gemini.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,6 @@ class GeminiModelProvider(RegistryBackedProviderMixin, ModelProvider):
4242
"max": 1.0, # 100% of max - full thinking budget
4343
}
4444

45-
# Model-specific thinking token limits
46-
MAX_THINKING_TOKENS = {
47-
"gemini-2.0-flash": 24576, # Same as 2.5 flash for consistency
48-
"gemini-2.0-flash-lite": 0, # No thinking support
49-
"gemini-2.5-flash": 24576, # Flash 2.5 thinking budget limit
50-
"gemini-2.5-pro": 32768, # Pro 2.5 thinking budget limit
51-
}
52-
5345
def __init__(self, api_key: str, **kwargs):
5446
"""Initialize Gemini provider with API key and optional base URL."""
5547
self._ensure_registry()
@@ -124,7 +116,7 @@ def generate_content(
124116
prompt: str,
125117
model_name: str,
126118
system_prompt: Optional[str] = None,
127-
temperature: float = 0.3,
119+
temperature: float = 1.0,
128120
max_output_tokens: Optional[int] = None,
129121
thinking_mode: str = "medium",
130122
images: Optional[list[str]] = None,

tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
}
2424
],
2525
"generationConfig": {
26-
"temperature": 0.5,
26+
"temperature": 1.0,
2727
"candidateCount": 1,
2828
"thinkingConfig": {
2929
"thinkingBudget": 10813

tests/test_auto_mode_comprehensive.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def teardown_method(self):
8080
"OPENROUTER_API_KEY": None,
8181
},
8282
{
83-
"EXTENDED_REASONING": "gemini-2.5-pro", # Pro for deep thinking
83+
"EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview for deep thinking
8484
"FAST_RESPONSE": "gemini-2.5-flash", # Flash for speed
8585
"BALANCED": "gemini-2.5-flash", # Flash as balanced
8686
},
@@ -122,7 +122,7 @@ def teardown_method(self):
122122
"OPENROUTER_API_KEY": None,
123123
},
124124
{
125-
"EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority
125+
"EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview comes first in priority
126126
"FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed
127127
"BALANCED": "gemini-2.5-flash", # Prefer flash for balanced
128128
},
@@ -136,7 +136,7 @@ def teardown_method(self):
136136
"OPENROUTER_API_KEY": None,
137137
},
138138
{
139-
"EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority
139+
"EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview comes first in priority
140140
"FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed
141141
"BALANCED": "gemini-2.5-flash", # Prefer flash for balanced
142142
},

tests/test_auto_mode_provider_selection.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_gemini_only_fallback_selection(self):
5959
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
6060

6161
# Should select appropriate Gemini models
62-
assert extended_reasoning in ["gemini-2.5-pro", "pro"]
62+
assert extended_reasoning in ["gemini-3-pro-preview", "gemini-2.5-pro", "pro"]
6363
assert fast_response in ["gemini-2.5-flash", "flash"]
6464
assert balanced in ["gemini-2.5-flash", "flash"]
6565

@@ -139,7 +139,7 @@ def test_both_gemini_and_openai_priority(self):
139139
fast_response = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
140140

141141
# Should prefer Gemini now (based on new provider priority: Gemini before OpenAI)
142-
assert extended_reasoning == "gemini-2.5-pro" # Gemini has higher priority now
142+
assert extended_reasoning == "gemini-3-pro-preview" # Gemini 3 Pro Preview has higher priority now
143143

144144
# Should prefer Gemini for fast response
145145
assert fast_response == "gemini-2.5-flash" # Gemini has higher priority now
@@ -317,7 +317,7 @@ def test_alias_resolution_before_api_calls(self):
317317
# Test that providers resolve aliases correctly
318318
test_cases = [
319319
("flash", ProviderType.GOOGLE, "gemini-2.5-flash"),
320-
("pro", ProviderType.GOOGLE, "gemini-2.5-pro"),
320+
("pro", ProviderType.GOOGLE, "gemini-3-pro-preview"), # "pro" now resolves to gemini-3-pro-preview
321321
("mini", ProviderType.OPENAI, "gpt-5-mini"), # "mini" now resolves to gpt-5-mini
322322
("o3mini", ProviderType.OPENAI, "o3-mini"),
323323
("grok", ProviderType.XAI, "grok-4"),

tests/test_challenge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_tool_metadata(self):
2828
assert "reflexive agreement" in self.tool.get_description()
2929
assert "critical thinking" in self.tool.get_description()
3030
assert "reasoned analysis" in self.tool.get_description()
31-
assert self.tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL
31+
assert self.tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL
3232

3333
def test_requires_model(self):
3434
"""Test that challenge tool doesn't require a model"""

tests/test_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ def test_model_config(self):
3535

3636
def test_temperature_defaults(self):
3737
"""Test temperature constants"""
38-
assert TEMPERATURE_ANALYTICAL == 0.2
39-
assert TEMPERATURE_BALANCED == 0.5
40-
assert TEMPERATURE_CREATIVE == 0.7
38+
assert TEMPERATURE_ANALYTICAL == 1.0
39+
assert TEMPERATURE_BALANCED == 1.0
40+
assert TEMPERATURE_CREATIVE == 1.0

tests/test_consensus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_tool_metadata(self):
1919

2020
assert tool.get_name() == "consensus"
2121
assert "consensus" in tool.get_description()
22-
assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL
22+
assert tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL
2323
assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
2424
assert tool.requires_model() is False # Consensus manages its own models
2525

tests/test_consensus_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_against" / "mldev.json"
3030

3131

32+
@pytest.mark.integration
3233
@pytest.mark.asyncio
3334
@pytest.mark.no_mock_provider
3435
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"])

0 commit comments

Comments
 (0)