File tree Expand file tree Collapse file tree 2 files changed +29
-1
lines changed Expand file tree Collapse file tree 2 files changed +29
-1
lines changed Original file line number Diff line number Diff line change @@ -58,6 +58,8 @@ and this project adheres to
58
58
reduced the amount of memory allocated by approximately 25%. (#4887 )
59
59
- Removed several memory allocations that happened during inference with discrete actions. (#4922 )
60
60
- Properly catch permission errors when writing timer files. (#4921 )
61
+ - Unexpected gRPC exceptions during training are now logged before stopping training. If you see
62
+ "noisy" log, please let us know! (#4930 )
61
63
62
64
#### ml-agents / ml-agents-envs / gym-unity (Python)
63
65
- Fixed a bug that would cause an exception when ` RunOptions ` was deserialized via ` pickle ` . (#4842 )
Original file line number Diff line number Diff line change @@ -440,6 +440,7 @@ UnityInputProto Exchange(UnityOutputProto unityOutput)
440
440
{
441
441
return null ;
442
442
}
443
+
443
444
try
444
445
{
445
446
var message = m_Client . Exchange ( WrapMessage ( unityOutput , 200 ) ) ;
@@ -455,8 +456,33 @@ UnityInputProto Exchange(UnityOutputProto unityOutput)
455
456
QuitCommandReceived ? . Invoke ( ) ;
456
457
return message . UnityInput ;
457
458
}
458
- catch
459
+ catch ( RpcException rpcException )
460
+ {
461
+ // Log more verbose errors if they're something the user can possibly do something about.
462
+ switch ( rpcException . Status . StatusCode )
463
+ {
464
+ case StatusCode . Unavailable :
465
+ // This can happen when python disconnects. Ignore it to avoid noisy logs.
466
+ break ;
467
+ case StatusCode . ResourceExhausted :
468
+ // This happens is the message body is too large. There's no way to
469
+ // gracefully handle this, but at least we can show the message and the
470
+ // user can try to reduce the number of agents or observation sizes.
471
+ Debug . LogError ( $ "GRPC Exception: { rpcException . Message } . Disconnecting from trainer.") ;
472
+ break ;
473
+ default :
474
+ // Other unknown errors. Log at INFO level.
475
+ Debug . Log ( $ "GRPC Exception: { rpcException . Message } . Disconnecting from trainer.") ;
476
+ break ;
477
+ }
478
+ m_IsOpen = false ;
479
+ QuitCommandReceived ? . Invoke ( ) ;
480
+ return null ;
481
+ }
482
+ catch ( Exception ex )
459
483
{
484
+ // Fall-through for other error types
485
+ Debug . LogError ( $ "GRPC Exception: { ex . Message } . Disconnecting from trainer.") ;
460
486
m_IsOpen = false ;
461
487
QuitCommandReceived ? . Invoke ( ) ;
462
488
return null ;
You can’t perform that action at this time.
0 commit comments