2626)
2727from redis .asyncio .lock import Lock
2828from redis .asyncio .parser import CommandsParser
29+ from redis .asyncio .retry import Retry
30+ from redis .backoff import default_backoff
2931from redis .client import EMPTY_RESPONSE , NEVER_DECODE , AbstractRedis
3032from redis .cluster import (
3133 PIPELINE_BLOCKED_COMMANDS ,
@@ -110,10 +112,10 @@ class RedisCluster(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommand
110112 :param startup_nodes:
111113 | :class:`~.ClusterNode` to used as a startup node
112114 :param require_full_coverage:
113- | When set to ``False``: the client will not require a full coverage of the
114- slots. However, if not all slots are covered, and at least one node has
115- ``cluster-require-full-coverage`` set to ``yes``, the server will throw a
116- :class:`~.ClusterDownError` for some key-based commands.
115+ | When set to ``False``: the client will not require a full coverage of
116+ the slots. However, if not all slots are covered, and at least one node
117+ has ``cluster-require-full-coverage`` set to ``yes``, the server will throw
118+ a :class:`~.ClusterDownError` for some key-based commands.
117119 | When set to ``True``: all slots must be covered to construct the cluster
118120 client. If not all slots are covered, :class:`~.RedisClusterException` will be
119121 thrown.
@@ -136,7 +138,10 @@ class RedisCluster(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommand
136138 or :class:`~.ConnectionError` or :class:`~.ClusterDownError` are encountered
137139 :param connection_error_retry_attempts:
138140 | Number of times to retry before reinitializing when :class:`~.TimeoutError`
139- or :class:`~.ConnectionError` are encountered
141+ or :class:`~.ConnectionError` are encountered.
142+ The default backoff strategy will be set if Retry object is not passed (see
143+ default_backoff in backoff.py). To change it, pass a custom Retry object
144+ using the "retry" keyword.
140145 :param max_connections:
141146 | Maximum number of connections per node. If there are no free connections & the
142147 maximum number of connections are already created, a
@@ -214,9 +219,9 @@ def __init__(
214219 startup_nodes : Optional [List ["ClusterNode" ]] = None ,
215220 require_full_coverage : bool = True ,
216221 read_from_replicas : bool = False ,
217- reinitialize_steps : int = 10 ,
222+ reinitialize_steps : int = 5 ,
218223 cluster_error_retry_attempts : int = 3 ,
219- connection_error_retry_attempts : int = 5 ,
224+ connection_error_retry_attempts : int = 3 ,
220225 max_connections : int = 2 ** 31 ,
221226 # Client related kwargs
222227 db : Union [str , int ] = 0 ,
@@ -235,6 +240,8 @@ def __init__(
235240 socket_keepalive : bool = False ,
236241 socket_keepalive_options : Optional [Mapping [int , Union [int , bytes ]]] = None ,
237242 socket_timeout : Optional [float ] = None ,
243+ retry : Optional ["Retry" ] = None ,
244+ retry_on_error : Optional [List [Exception ]] = None ,
238245 # SSL related kwargs
239246 ssl : bool = False ,
240247 ssl_ca_certs : Optional [str ] = None ,
@@ -282,6 +289,7 @@ def __init__(
282289 "socket_keepalive" : socket_keepalive ,
283290 "socket_keepalive_options" : socket_keepalive_options ,
284291 "socket_timeout" : socket_timeout ,
292+ "retry" : retry ,
285293 }
286294
287295 if ssl :
@@ -302,6 +310,18 @@ def __init__(
302310 # Call our on_connect function to configure READONLY mode
303311 kwargs ["redis_connect_func" ] = self .on_connect
304312
313+ self .retry = retry
314+ if retry or retry_on_error or connection_error_retry_attempts > 0 :
315+ # Set a retry object for all cluster nodes
316+ self .retry = retry or Retry (
317+ default_backoff (), connection_error_retry_attempts
318+ )
319+ if not retry_on_error :
320+ # Default errors for retrying
321+ retry_on_error = [ConnectionError , TimeoutError ]
322+ self .retry .update_supported_errors (retry_on_error )
323+ kwargs .update ({"retry" : self .retry })
324+
305325 kwargs ["response_callbacks" ] = self .__class__ .RESPONSE_CALLBACKS .copy ()
306326 self .connection_kwargs = kwargs
307327
@@ -323,7 +343,6 @@ def __init__(
323343 self .reinitialize_steps = reinitialize_steps
324344 self .cluster_error_retry_attempts = cluster_error_retry_attempts
325345 self .connection_error_retry_attempts = connection_error_retry_attempts
326-
327346 self .reinitialize_counter = 0
328347 self .commands_parser = CommandsParser ()
329348 self .node_flags = self .__class__ .NODE_FLAGS .copy ()
@@ -481,6 +500,16 @@ def get_connection_kwargs(self) -> Dict[str, Optional[Any]]:
481500 """Get the kwargs passed to :class:`~redis.asyncio.connection.Connection`."""
482501 return self .connection_kwargs
483502
503+ def get_retry (self ) -> Optional ["Retry" ]:
504+ return self .retry
505+
506+ def set_retry (self , retry : "Retry" ) -> None :
507+ self .retry = retry
508+ for node in self .get_nodes ():
509+ node .connection_kwargs .update ({"retry" : retry })
510+ for conn in node ._connections :
511+ conn .retry = retry
512+
484513 def set_response_callback (self , command : str , callback : ResponseCallbackT ) -> None :
485514 """Set a custom response callback."""
486515 self .response_callbacks [command ] = callback
@@ -618,9 +647,11 @@ async def execute_command(self, *args: EncodableT, **kwargs: Any) -> Any:
618647 if passed_targets and not self ._is_node_flag (passed_targets ):
619648 target_nodes = self ._parse_target_nodes (passed_targets )
620649 target_nodes_specified = True
621- retry_attempts = 1
650+ retry_attempts = 0
622651
623- for _ in range (retry_attempts ):
652+ # Add one for the first execution
653+ execute_attempts = 1 + retry_attempts
654+ for _ in range (execute_attempts ):
624655 if self ._initialize :
625656 await self .initialize ()
626657 try :
@@ -658,25 +689,21 @@ async def execute_command(self, *args: EncodableT, **kwargs: Any) -> Any:
658689 )
659690 return dict (zip (keys , values ))
660691 except Exception as e :
661- if type (e ) in self .__class__ .ERRORS_ALLOW_RETRY :
662- # The nodes and slots cache were reinitialized.
692+ if retry_attempts > 0 and type (e ) in self .__class__ .ERRORS_ALLOW_RETRY :
693+ # The nodes and slots cache were should be reinitialized.
663694 # Try again with the new cluster setup.
664- exception = e
695+ retry_attempts -= 1
696+ continue
665697 else :
666- # All other errors should be raised.
667- raise
668-
669- # If it fails the configured number of times then raise exception back
670- # to caller of this method
671- raise exception
698+ # raise the exception
699+ raise e
672700
673701 async def _execute_command (
674702 self , target_node : "ClusterNode" , * args : Union [KeyT , EncodableT ], ** kwargs : Any
675703 ) -> Any :
676704 asking = moved = False
677705 redirect_addr = None
678706 ttl = self .RedisClusterRequestTTL
679- connection_error_retry_counter = 0
680707
681708 while ttl > 0 :
682709 ttl -= 1
@@ -695,25 +722,18 @@ async def _execute_command(
695722 moved = False
696723
697724 return await target_node .execute_command (* args , ** kwargs )
698- except BusyLoadingError :
725+ except (BusyLoadingError , MaxConnectionsError ):
726+ raise
727+ except (ConnectionError , TimeoutError ):
728+ # Connection retries are being handled in the node's
729+ # Retry object.
730+ # Remove the failed node from the startup nodes before we try
731+ # to reinitialize the cluster
732+ self .nodes_manager .startup_nodes .pop (target_node .name , None )
733+ # Hard force of reinitialize of the node/slots setup
734+ # and try again with the new setup
735+ await self .close ()
699736 raise
700- except (ConnectionError , TimeoutError ) as e :
701- # Give the node 0.25 seconds to get back up and retry again with the
702- # same node and configuration. After the defined number of attempts, try
703- # to reinitialize the cluster and try again.
704- connection_error_retry_counter += 1
705- if (
706- connection_error_retry_counter
707- < self .connection_error_retry_attempts
708- ):
709- await asyncio .sleep (0.25 )
710- else :
711- if isinstance (e , MaxConnectionsError ):
712- raise
713- # Hard force of reinitialize of the node/slots setup
714- # and try again with the new setup
715- await self .close ()
716- raise
717737 except ClusterDownError :
718738 # ClusterDownError can occur during a failover and to get
719739 # self-healed, we will try to reinitialize the cluster layout
@@ -1145,26 +1165,11 @@ async def initialize(self) -> None:
11451165 )
11461166 cluster_slots = await startup_node .execute_command ("CLUSTER SLOTS" )
11471167 startup_nodes_reachable = True
1148- except (ConnectionError , TimeoutError ) as e :
1168+ except Exception as e :
1169+ # Try the next startup node.
1170+ # The exception is saved and raised only if we have no more nodes.
11491171 exception = e
11501172 continue
1151- except ResponseError as e :
1152- # Isn't a cluster connection, so it won't parse these
1153- # exceptions automatically
1154- message = e .__str__ ()
1155- if "CLUSTERDOWN" in message or "MASTERDOWN" in message :
1156- continue
1157- else :
1158- raise RedisClusterException (
1159- 'ERROR sending "cluster slots" command to redis '
1160- f"server: { startup_node } . error: { message } "
1161- )
1162- except Exception as e :
1163- message = e .__str__ ()
1164- raise RedisClusterException (
1165- 'ERROR sending "cluster slots" command to redis '
1166- f"server { startup_node .name } . error: { message } "
1167- )
11681173
11691174 # CLUSTER SLOTS command results in the following output:
11701175 # [[slot_section[from_slot,to_slot,master,replica1,...,replicaN]]]
@@ -1245,8 +1250,8 @@ async def initialize(self) -> None:
12451250
12461251 if not startup_nodes_reachable :
12471252 raise RedisClusterException (
1248- "Redis Cluster cannot be connected. Please provide at least "
1249- "one reachable node. "
1253+ f "Redis Cluster cannot be connected. Please provide at least "
1254+ f "one reachable node: { str ( exception ) } "
12501255 ) from exception
12511256
12521257 # Check if the slots are not fully covered
0 commit comments