@@ -38,7 +38,7 @@ def __init__(self, client: CrateDBClient):
3838 self .shards : List [ShardInfo ] = []
3939
4040 # Initialize session-based caches for performance.
41- self ._zone_conflict_cache : Dict [Tuple [str , int , str ], Union [str , None ]] = {}
41+ self ._zone_conflict_cache : Dict [Tuple [str , str , int , str ], Union [str , None ]] = {}
4242 self ._node_lookup_cache : Dict [str , Union [NodeInfo , None ]] = {}
4343 self ._target_nodes_cache : Dict [Tuple [float , frozenset [Any ], float , float ], List [NodeInfo ]] = {}
4444 self ._cache_hits = 0
@@ -183,8 +183,6 @@ def find_nodes_with_capacity(
183183 free_space_gb = node .available_space_gb
184184 if free_space_gb >= (required_space_gb + min_free_space_gb ):
185185 available_nodes .append (node )
186- else :
187- continue
188186
189187 # Sort by available space (most space first) - prioritize nodes with more free space
190188 available_nodes .sort (key = lambda n : n .available_space_gb , reverse = True )
@@ -206,7 +204,7 @@ def generate_rebalancing_recommendations(
206204 # Get moveable shards (only healthy ones for actual operations)
207205 moveable_shards = self .find_moveable_shards (constraints .min_size , constraints .max_size , constraints .table_name )
208206
209- print (
207+ logger . info (
210208 f"Analyzing { len (moveable_shards )} candidate shards "
211209 f"in size range { constraints .min_size } -{ constraints .max_size } GB..."
212210 )
@@ -239,12 +237,11 @@ def generate_rebalancing_recommendations(
239237 # Optimize processing: if filtering by source node, only process those shards
240238 if constraints .source_node :
241239 processing_shards = [s for s in moveable_shards if s .node_name == constraints .source_node ]
242- print (f"Focusing on { len (processing_shards )} shards from node { constraints .source_node } " )
240+ logger . info (f"Focusing on { len (processing_shards )} shards from node { constraints .source_node } " )
243241 else :
244242 processing_shards = moveable_shards
245243
246244 # Generate move recommendations
247- safe_recommendations = 0 # noqa: F841
248245 total_evaluated = 0
249246
250247 for i , shard in enumerate (processing_shards ):
@@ -368,12 +365,12 @@ def generate_rebalancing_recommendations(
368365
369366 if len (processing_shards ) > 100 :
370367 print () # New line after progress dots
371- print (f"Generated { len (recommendations )} move recommendations (evaluated { total_evaluated } shards)" )
372- print (f"Performance: { self .get_cache_stats ()} " )
368+ logger . info (f"Generated { len (recommendations )} move recommendations (evaluated { total_evaluated } shards)" )
369+ logger . info (f"Performance: { self .get_cache_stats ()} " )
373370 return recommendations
374371
375372 def validate_move_safety (
376- self , recommendation : ShardRelocationResponse , max_disk_usage_percent : float = 90.0
373+ self , recommendation : ShardRelocationResponse , max_disk_usage_percent : float = 90.0 , buffer_gb : float = 50.0
377374 ) -> Tuple [bool , str ]:
378375 """Validate that a move recommendation is safe to execute"""
379376 # Find target node (with caching)
@@ -388,7 +385,7 @@ def validate_move_safety(
388385 return False , zone_conflict
389386
390387 # Check available space
391- required_space_gb = recommendation .size_gb + 50 # 50GB buffer
388+ required_space_gb = recommendation .size_gb + buffer_gb
392389 if target_node .available_space_gb < required_space_gb :
393390 return (
394391 False ,
@@ -423,7 +420,7 @@ def _check_zone_conflict_cached(self, recommendation: ShardRelocationResponse) -
423420 """Check zone conflicts with caching"""
424421 # Create cache key: table, shard, target zone
425422 target_zone = self ._get_node_zone (recommendation .to_node )
426- cache_key = (recommendation .table_name , recommendation .shard_id , target_zone )
423+ cache_key = (recommendation .schema_name , recommendation . table_name , recommendation .shard_id , target_zone )
427424
428425 if cache_key in self ._zone_conflict_cache :
429426 self ._cache_hits += 1
@@ -813,11 +810,14 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
813810 # Determine feasibility
814811 feasible = len (infeasible_moves ) == 0
815812
813+ # Safety margin for cluster capacity after decommission
814+ capacity_safety_margin = 1.2 # 20 % buffer
815+
816816 # Add capacity warnings
817817 if feasible :
818- # Check if remaining cluster capacity is sufficient after decommission
818+ # Check if the remaining cluster capacity is sufficient after decommission
819819 remaining_capacity = sum (n .available_space_gb for n in self .nodes if n .name != node_name )
820- if remaining_capacity < total_size_gb * 1.2 : # 20% safety margin
820+ if remaining_capacity < total_size_gb * capacity_safety_margin :
821821 warnings .append (
822822 f"Low remaining capacity after decommission. "
823823 f"Only { remaining_capacity :.1f} GB available for { total_size_gb :.1f} GB of data"
@@ -833,7 +833,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100.
833833 "recommendations" : move_plan ,
834834 "infeasible_moves" : infeasible_moves ,
835835 "warnings" : warnings ,
836- "estimated_time_hours" : len (move_plan ) * 0.1 , # Rough estimate: 6 minutes per move
836+ "estimated_time_hours" : len (move_plan ) * 0.1 , # Rough estimate: 0.1 hours ( 6 minutes) per move
837837 "message" : "Decommission plan generated" if feasible else "Decommission not currently feasible" ,
838838 }
839839
0 commit comments